mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
From: t-ishii@sra.co.jp
Hi, here are patches I promised (against 6.3.2):
* character_length(), position(), substring() are now aware of
multi-byte characters
* add octet_length()
* add --with-mb option to configure
* new regression tests for EUC_KR
(contributed by "Soonmyung. Hong" <hong@lunaris.hanmesoft.co.kr>)
* add some test cases to the EUC_JP regression test
* fix problem in regress/regress.sh in case of System V
* fix toupper(), tolower() to handle 8bit chars
note that:
o patches for both configure.in and configure are
included. maybe the one for configure is not necessary.
o pg_proc.h was modified to add octet_length(). I used OIDs
(1374-1379) for that. Please let me know if these numbers are not
appropriate.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Edmund Mergl <E.Mergl@bawue.de>
|
||||
*
|
||||
* $Id: oracle_compat.c,v 1.12 1998/02/26 04:37:19 momjian Exp $
|
||||
* $Id: oracle_compat.c,v 1.13 1998/04/27 17:08:19 scrappy Exp $
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -55,7 +55,7 @@ lower(text *string)
|
||||
|
||||
while (m--)
|
||||
{
|
||||
*ptr_ret++ = tolower(*ptr++);
|
||||
*ptr_ret++ = tolower((unsigned char)*ptr++);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -95,7 +95,7 @@ upper(text *string)
|
||||
|
||||
while (m--)
|
||||
{
|
||||
*ptr_ret++ = toupper(*ptr++);
|
||||
*ptr_ret++ = toupper((unsigned char)*ptr++);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -135,18 +135,18 @@ initcap(text *string)
|
||||
ptr = VARDATA(string);
|
||||
ptr_ret = VARDATA(ret);
|
||||
|
||||
*ptr_ret++ = toupper(*ptr++);
|
||||
*ptr_ret++ = toupper((unsigned char)*ptr++);
|
||||
--m;
|
||||
|
||||
while (m--)
|
||||
{
|
||||
if (*(ptr_ret - 1) == ' ' || *(ptr_ret - 1) == ' ')
|
||||
{
|
||||
*ptr_ret++ = toupper(*ptr++);
|
||||
*ptr_ret++ = toupper((unsigned char)*ptr++);
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr_ret++ = tolower(*ptr++);
|
||||
*ptr_ret++ = tolower((unsigned char)*ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.29 1998/02/26 04:37:24 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.30 1998/04/27 17:08:26 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -21,6 +21,8 @@ char *convertstr(char *, int, int);
|
||||
|
||||
#endif
|
||||
|
||||
#include "regex/pg_wchar.h"
|
||||
|
||||
/*
|
||||
* CHAR() and VARCHAR() types are part of the ANSI SQL standard. CHAR()
|
||||
* is for blank-padded string whose length is specified in CREATE TABLE.
|
||||
@@ -213,6 +215,31 @@ bcTruelen(char *arg)
|
||||
|
||||
int32
|
||||
bpcharlen(char *arg)
|
||||
{
|
||||
#ifdef MB
|
||||
unsigned char *s;
|
||||
int len, l, wl;
|
||||
#endif
|
||||
if (!PointerIsValid(arg))
|
||||
elog(ERROR, "Bad (null) char() external representation", NULL);
|
||||
#ifdef MB
|
||||
l = bcTruelen(arg);
|
||||
len = 0;
|
||||
s = VARDATA(arg);
|
||||
while (l > 0) {
|
||||
wl = pg_mblen(s);
|
||||
l -= wl;
|
||||
s += wl;
|
||||
len++;
|
||||
}
|
||||
return(len);
|
||||
#else
|
||||
return (bcTruelen(arg));
|
||||
#endif
|
||||
}
|
||||
|
||||
int32
|
||||
bpcharoctetlen(char *arg)
|
||||
{
|
||||
if (!PointerIsValid(arg))
|
||||
elog(ERROR, "Bad (null) char() external representation", NULL);
|
||||
@@ -354,9 +381,34 @@ bpcharcmp(char *arg1, char *arg2)
|
||||
int32
|
||||
varcharlen(char *arg)
|
||||
{
|
||||
#ifdef MB
|
||||
unsigned char *s;
|
||||
int len, l, wl;
|
||||
#endif
|
||||
if (!PointerIsValid(arg))
|
||||
elog(ERROR, "Bad (null) varchar() external representation", NULL);
|
||||
|
||||
#ifdef MB
|
||||
len = 0;
|
||||
s = VARDATA(arg);
|
||||
l = VARSIZE(arg) - VARHDRSZ;
|
||||
while (l > 0) {
|
||||
wl = pg_mblen(s);
|
||||
l -= wl;
|
||||
s += wl;
|
||||
len++;
|
||||
}
|
||||
return(len);
|
||||
#else
|
||||
return (VARSIZE(arg) - VARHDRSZ);
|
||||
#endif
|
||||
}
|
||||
|
||||
int32
|
||||
varcharoctetlen(char *arg)
|
||||
{
|
||||
if (!PointerIsValid(arg))
|
||||
elog(ERROR, "Bad (null) varchar() external representation", NULL);
|
||||
return (VARSIZE(arg) - VARHDRSZ);
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.32 1998/03/15 08:07:01 scrappy Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.33 1998/04/27 17:08:28 scrappy Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "utils/palloc.h"
|
||||
#include "utils/builtins.h" /* where function declarations go */
|
||||
|
||||
#include "regex/pg_wchar.h"
|
||||
|
||||
/*****************************************************************************
|
||||
* USER I/O ROUTINES *
|
||||
*****************************************************************************/
|
||||
@@ -198,18 +200,52 @@ textout(text *vlena)
|
||||
|
||||
/*
|
||||
* textlen -
|
||||
* returns the actual length of a text*
|
||||
* returns the logical length of a text*
|
||||
* (which is less than the VARSIZE of the text*)
|
||||
*/
|
||||
int32
|
||||
textlen(text *t)
|
||||
{
|
||||
#ifdef MB
|
||||
unsigned char *s;
|
||||
int len, l, wl;
|
||||
#endif
|
||||
|
||||
if (!PointerIsValid(t))
|
||||
elog(ERROR, "Null input to textlen");
|
||||
|
||||
#ifdef MB
|
||||
len = 0;
|
||||
s = VARDATA(t);
|
||||
l = VARSIZE(t) - VARHDRSZ;
|
||||
while (l > 0) {
|
||||
wl = pg_mblen(s);
|
||||
l -= wl;
|
||||
s += wl;
|
||||
len++;
|
||||
}
|
||||
return(len);
|
||||
#else
|
||||
return (VARSIZE(t) - VARHDRSZ);
|
||||
#endif
|
||||
|
||||
} /* textlen() */
|
||||
|
||||
/*
|
||||
* textoctetlen -
|
||||
* returns the physical length of a text*
|
||||
* (which is less than the VARSIZE of the text*)
|
||||
*/
|
||||
int32
|
||||
textoctetlen(text *t)
|
||||
{
|
||||
if (!PointerIsValid(t))
|
||||
elog(ERROR, "Null input to textoctetlen");
|
||||
|
||||
return (VARSIZE(t) - VARHDRSZ);
|
||||
|
||||
} /* textoctetlen() */
|
||||
|
||||
/*
|
||||
* textcat -
|
||||
* takes two text* and returns a text* that is the concatentation of
|
||||
@@ -278,17 +314,27 @@ textcat(text *t1, text *t2)
|
||||
*
|
||||
* Note that the arguments operate on octet length,
|
||||
* so not aware of multi-byte character sets.
|
||||
*
|
||||
* Added multi-byte support.
|
||||
* - Tatsuo Ishii 1998-4-21
|
||||
*/
|
||||
text *
|
||||
text_substr(text *string, int32 m, int32 n)
|
||||
{
|
||||
text *ret;
|
||||
int len;
|
||||
#ifdef MB
|
||||
int i;
|
||||
char *p;
|
||||
#endif
|
||||
|
||||
if ((string == (text *) NULL) || (m <= 0))
|
||||
return string;
|
||||
|
||||
len = VARSIZE(string) - VARHDRSZ;
|
||||
#ifdef MB
|
||||
len = pg_mbstrlen_with_len(VARDATA(string),len);
|
||||
#endif
|
||||
|
||||
/* m will now become a zero-based starting position */
|
||||
if (m > len)
|
||||
@@ -303,6 +349,17 @@ text_substr(text *string, int32 m, int32 n)
|
||||
n = (len - m);
|
||||
}
|
||||
|
||||
#ifdef MB
|
||||
p = VARDATA(string);
|
||||
for (i=0;i<m;i++) {
|
||||
p += pg_mblen(p);
|
||||
}
|
||||
m = p - VARDATA(string);
|
||||
for (i=0;i<n;i++) {
|
||||
p += pg_mblen(p);
|
||||
}
|
||||
n = p - (VARDATA(string) + m);
|
||||
#endif
|
||||
ret = (text *) palloc(VARHDRSZ + n);
|
||||
VARSIZE(ret) = VARHDRSZ + n;
|
||||
|
||||
@@ -317,6 +374,9 @@ text_substr(text *string, int32 m, int32 n)
|
||||
* Implements the SQL92 POSITION() function.
|
||||
* Ref: A Guide To The SQL Standard, Date & Darwen, 1997
|
||||
* - thomas 1997-07-27
|
||||
*
|
||||
* Added multi-byte support.
|
||||
* - Tatsuo Ishii 1998-4-21
|
||||
*/
|
||||
int32
|
||||
textpos(text *t1, text *t2)
|
||||
@@ -326,8 +386,11 @@ textpos(text *t1, text *t2)
|
||||
p;
|
||||
int len1,
|
||||
len2;
|
||||
char *p1,
|
||||
pg_wchar *p1,
|
||||
*p2;
|
||||
#ifdef MB
|
||||
pg_wchar *ps1, *ps2;
|
||||
#endif
|
||||
|
||||
if (!PointerIsValid(t1) || !PointerIsValid(t2))
|
||||
return (0);
|
||||
@@ -337,19 +400,36 @@ textpos(text *t1, text *t2)
|
||||
|
||||
len1 = (VARSIZE(t1) - VARHDRSZ);
|
||||
len2 = (VARSIZE(t2) - VARHDRSZ);
|
||||
#ifdef MB
|
||||
ps1 = p1 = (pg_wchar *) palloc((len1 + 1)*sizeof(pg_wchar));
|
||||
(void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t1),p1,len1);
|
||||
len1 = pg_wchar_strlen(p1);
|
||||
ps2 = p2 = (pg_wchar *) palloc((len2 + 1)*sizeof(pg_wchar));
|
||||
(void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t2),p2,len2);
|
||||
len2 = pg_wchar_strlen(p2);
|
||||
#else
|
||||
p1 = VARDATA(t1);
|
||||
p2 = VARDATA(t2);
|
||||
#endif
|
||||
pos = 0;
|
||||
px = (len1 - len2);
|
||||
for (p = 0; p <= px; p++)
|
||||
{
|
||||
#ifdef MB
|
||||
if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
|
||||
#else
|
||||
if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
|
||||
#endif
|
||||
{
|
||||
pos = p + 1;
|
||||
break;
|
||||
};
|
||||
p1++;
|
||||
};
|
||||
#ifdef MB
|
||||
pfree(ps1);
|
||||
pfree(ps2);
|
||||
#endif
|
||||
return (pos);
|
||||
} /* textpos() */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user