mirror of
https://github.com/postgres/postgres.git
synced 2025-08-06 18:42:54 +03:00
Fix usage of char2wchar/wchar2char. Changes:
- pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch.
This commit is contained in:
@@ -64,15 +64,8 @@ char2wchar(wchar_t *to, const char *from, size_t len)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* WIN32 */
|
#endif /* WIN32 */
|
||||||
if ( lc_ctype_is_c() )
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* pg_mb2wchar_with_len always adds trailing '\0', so
|
|
||||||
* 'to' should be allocated with sufficient space
|
|
||||||
*/
|
|
||||||
return pg_mb2wchar_with_len(from, (pg_wchar *)to, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Assert( !lc_ctype_is_c() );
|
||||||
return mbstowcs(to, from, len);
|
return mbstowcs(to, from, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.2 2007/03/22 15:59:09 teodor Exp $ */
|
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.3 2009/03/02 15:13:17 teodor Exp $ */
|
||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
@@ -46,12 +46,24 @@ TParserInit(char *str, int len)
|
|||||||
if (prs->charmaxlen > 1)
|
if (prs->charmaxlen > 1)
|
||||||
{
|
{
|
||||||
prs->usewide = true;
|
prs->usewide = true;
|
||||||
|
if ( lc_ctype_is_c() )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* char2wchar doesn't work for C-locale and
|
||||||
|
* sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
|
||||||
|
*/
|
||||||
|
prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
|
||||||
|
pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1));
|
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1));
|
||||||
prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr);
|
prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
prs->usewide = false;
|
prs->usewide = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
prs->state = newTParserPosition(NULL);
|
prs->state = newTParserPosition(NULL);
|
||||||
prs->state->state = TPS_Base;
|
prs->state->state = TPS_Base;
|
||||||
@@ -73,17 +85,21 @@ TParserClose(TParser * prs)
|
|||||||
#ifdef TS_USE_WIDE
|
#ifdef TS_USE_WIDE
|
||||||
if (prs->wstr)
|
if (prs->wstr)
|
||||||
pfree(prs->wstr);
|
pfree(prs->wstr);
|
||||||
|
if (prs->pgwstr)
|
||||||
|
pfree(prs->pgwstr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pfree(prs);
|
pfree(prs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* defining support function, equvalent is* macroses, but
|
* Character-type support functions, equivalent to is* macros, but
|
||||||
* working with any possible encodings and locales. Note,
|
* working with any possible encodings and locales. Notes:
|
||||||
* that with multibyte encoding and C-locale isw* function may fail
|
* - with multibyte encoding and C-locale isw* function may fail
|
||||||
* or give wrong result. Note 2: multibyte encoding and C-locale
|
* or give wrong result.
|
||||||
* often are used for Asian languages.
|
* - multibyte encoding and C-locale often are used for
|
||||||
|
* Asian languages.
|
||||||
|
* - if locale is C the we use pgwstr instead of wstr
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef TS_USE_WIDE
|
#ifdef TS_USE_WIDE
|
||||||
@@ -94,8 +110,8 @@ p_is##type(TParser *prs) { \
|
|||||||
Assert( prs->state ); \
|
Assert( prs->state ); \
|
||||||
if ( prs->usewide ) \
|
if ( prs->usewide ) \
|
||||||
{ \
|
{ \
|
||||||
if ( lc_ctype_is_c() ) \
|
if ( prs->pgwstr ) \
|
||||||
return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
|
return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
|
||||||
\
|
\
|
||||||
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
|
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
|
||||||
} \
|
} \
|
||||||
@@ -115,9 +131,9 @@ p_isalnum(TParser *prs)
|
|||||||
|
|
||||||
if (prs->usewide)
|
if (prs->usewide)
|
||||||
{
|
{
|
||||||
if (lc_ctype_is_c())
|
if (prs->pgwstr)
|
||||||
{
|
{
|
||||||
unsigned int c = *(prs->wstr + prs->state->poschar);
|
unsigned int c = *(prs->pgwstr + prs->state->poschar);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* any non-ascii symbol with multibyte encoding
|
* any non-ascii symbol with multibyte encoding
|
||||||
@@ -148,9 +164,9 @@ p_isalpha(TParser *prs)
|
|||||||
|
|
||||||
if (prs->usewide)
|
if (prs->usewide)
|
||||||
{
|
{
|
||||||
if (lc_ctype_is_c())
|
if (prs->pgwstr)
|
||||||
{
|
{
|
||||||
unsigned int c = *(prs->wstr + prs->state->poschar);
|
unsigned int c = *(prs->pgwstr + prs->state->poschar);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* any non-ascii symbol with multibyte encoding
|
* any non-ascii symbol with multibyte encoding
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
|
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11.2.1 2009/03/02 15:13:17 teodor Exp $ */
|
||||||
|
|
||||||
#ifndef __PARSER_H__
|
#ifndef __PARSER_H__
|
||||||
#define __PARSER_H__
|
#define __PARSER_H__
|
||||||
@@ -138,12 +138,13 @@ typedef struct TParser
|
|||||||
int lenstr; /* length of mbstring */
|
int lenstr; /* length of mbstring */
|
||||||
#ifdef TS_USE_WIDE
|
#ifdef TS_USE_WIDE
|
||||||
wchar_t *wstr; /* wide character string */
|
wchar_t *wstr; /* wide character string */
|
||||||
|
pg_wchar *pgwstr; /* wide character string for C-locale */
|
||||||
int lenwstr; /* length of wsting */
|
int lenwstr; /* length of wsting */
|
||||||
|
bool usewide;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* State of parse */
|
/* State of parse */
|
||||||
int charmaxlen;
|
int charmaxlen;
|
||||||
bool usewide;
|
|
||||||
TParserPosition *state;
|
TParserPosition *state;
|
||||||
bool ignore;
|
bool ignore;
|
||||||
bool wanthost;
|
bool wanthost;
|
||||||
|
Reference in New Issue
Block a user