1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-26 12:21:12 +03:00
Hi, here are patches I promised (against 6.3.2):

* character_length(), position(), substring() are now aware of
          multi-byte characters
* add octet_length()
* add --with-mb option to configure
* new regression tests for EUC_KR
  (contributed by "Soonmyung. Hong" <hong@lunaris.hanmesoft.co.kr>)
* add some test cases to the EUC_JP regression test
* fix problem in regress/regress.sh in case of System V
* fix toupper(), tolower() to handle 8bit chars

note that:

o  patches for both configure.in and configure are
included. maybe the one for configure is not necessary.

o pg_proc.h was modified to add octet_length(). I used OIDs
(1374-1379) for that. Please let me know if these numbers are not
appropriate.
This commit is contained in:
Marc G. Fournier
1998-04-27 17:10:50 +00:00
parent 2cbcf46102
commit f554af0a9f
15 changed files with 749 additions and 372 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.32 1998/03/15 08:07:01 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.33 1998/04/27 17:08:28 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
@ -18,6 +18,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h" /* where function declarations go */
#include "regex/pg_wchar.h"
/*****************************************************************************
* USER I/O ROUTINES *
*****************************************************************************/
@ -198,18 +200,52 @@ textout(text *vlena)
/*
* textlen -
* returns the actual length of a text*
* returns the logical length of a text*
* (which is less than the VARSIZE of the text*)
*/
int32
textlen(text *t)
{
#ifdef MB
unsigned char *s;
int len, l, wl;
#endif
if (!PointerIsValid(t))
elog(ERROR, "Null input to textlen");
#ifdef MB
len = 0;
s = VARDATA(t);
l = VARSIZE(t) - VARHDRSZ;
while (l > 0) {
wl = pg_mblen(s);
l -= wl;
s += wl;
len++;
}
return(len);
#else
return (VARSIZE(t) - VARHDRSZ);
#endif
} /* textlen() */
/*
* textoctetlen -
* returns the physical length of a text*
* (which is less than the VARSIZE of the text*)
*/
int32
textoctetlen(text *t)
{
if (!PointerIsValid(t))
elog(ERROR, "Null input to textoctetlen");
return (VARSIZE(t) - VARHDRSZ);
} /* textoctetlen() */
/*
* textcat -
* takes two text* and returns a text* that is the concatentation of
@ -278,17 +314,27 @@ textcat(text *t1, text *t2)
*
* Note that the arguments operate on octet length,
* so not aware of multi-byte character sets.
*
* Added multi-byte support.
* - Tatsuo Ishii 1998-4-21
*/
text *
text_substr(text *string, int32 m, int32 n)
{
text *ret;
int len;
#ifdef MB
int i;
char *p;
#endif
if ((string == (text *) NULL) || (m <= 0))
return string;
len = VARSIZE(string) - VARHDRSZ;
#ifdef MB
len = pg_mbstrlen_with_len(VARDATA(string),len);
#endif
/* m will now become a zero-based starting position */
if (m > len)
@ -303,6 +349,17 @@ text_substr(text *string, int32 m, int32 n)
n = (len - m);
}
#ifdef MB
p = VARDATA(string);
for (i=0;i<m;i++) {
p += pg_mblen(p);
}
m = p - VARDATA(string);
for (i=0;i<n;i++) {
p += pg_mblen(p);
}
n = p - (VARDATA(string) + m);
#endif
ret = (text *) palloc(VARHDRSZ + n);
VARSIZE(ret) = VARHDRSZ + n;
@ -317,6 +374,9 @@ text_substr(text *string, int32 m, int32 n)
* Implements the SQL92 POSITION() function.
* Ref: A Guide To The SQL Standard, Date & Darwen, 1997
* - thomas 1997-07-27
*
* Added multi-byte support.
* - Tatsuo Ishii 1998-4-21
*/
int32
textpos(text *t1, text *t2)
@ -326,8 +386,11 @@ textpos(text *t1, text *t2)
p;
int len1,
len2;
char *p1,
pg_wchar *p1,
*p2;
#ifdef MB
pg_wchar *ps1, *ps2;
#endif
if (!PointerIsValid(t1) || !PointerIsValid(t2))
return (0);
@ -337,19 +400,36 @@ textpos(text *t1, text *t2)
len1 = (VARSIZE(t1) - VARHDRSZ);
len2 = (VARSIZE(t2) - VARHDRSZ);
#ifdef MB
ps1 = p1 = (pg_wchar *) palloc((len1 + 1)*sizeof(pg_wchar));
(void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t1),p1,len1);
len1 = pg_wchar_strlen(p1);
ps2 = p2 = (pg_wchar *) palloc((len2 + 1)*sizeof(pg_wchar));
(void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t2),p2,len2);
len2 = pg_wchar_strlen(p2);
#else
p1 = VARDATA(t1);
p2 = VARDATA(t2);
#endif
pos = 0;
px = (len1 - len2);
for (p = 0; p <= px; p++)
{
#ifdef MB
if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
#else
if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
#endif
{
pos = p + 1;
break;
};
p1++;
};
#ifdef MB
pfree(ps1);
pfree(ps2);
#endif
return (pos);
} /* textpos() */