Fix usage of char2wchar/wchar2char. Changes:

- pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch.
2025-08-06 18:42:54 +03:00 · 2009-03-02 15:13:17 +00:00
parent 08a0989014
commit 8fcdac9987
3 changed files with 35 additions and 25 deletions
--- a/contrib/tsearch2/ts_locale.c
+++ b/contrib/tsearch2/ts_locale.c
@@ -64,15 +64,8 @@ char2wchar(wchar_t *to, const char *from, size_t len)
 	}
 	else 
 #endif /* WIN32 */
 	if ( lc_ctype_is_c() )
 	{
 		/*
 		 * pg_mb2wchar_with_len always adds trailing '\0', so 
 		 * 'to' should be allocated with sufficient space 
 		 */
 		return pg_mb2wchar_with_len(from, (pg_wchar *)to, len);
 	}
 	Assert( !lc_ctype_is_c() );
 	return mbstowcs(to, from, len);
 }
--- a/contrib/tsearch2/wordparser/parser.c
+++ b/contrib/tsearch2/wordparser/parser.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.2 2007/03/22 15:59:09 teodor Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.3 2009/03/02 15:13:17 teodor Exp $ */
 #include "postgres.h"
@@ -46,12 +46,24 @@ TParserInit(char *str, int len)
 	if (prs->charmaxlen > 1)
 	{
 		prs->usewide = true;
 		if ( lc_ctype_is_c() )
 		{
 			/*
 			 * char2wchar doesn't work for C-locale and
 			 * sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
 			 */
 			prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
 			pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
 		}
 		else
 		{
 			prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1));
 			prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr);
 		}
 	}
 	else
 #endif
 		prs->usewide = false;
 #endif
 	prs->state = newTParserPosition(NULL);
 	prs->state->state = TPS_Base;
@@ -73,17 +85,21 @@ TParserClose(TParser * prs)
 #ifdef TS_USE_WIDE
 	if (prs->wstr)
 		pfree(prs->wstr);
 	if (prs->pgwstr)
 		pfree(prs->pgwstr);
 #endif
 	pfree(prs);
 }
 /*
- * defining support function, equvalent is* macroses, but
+ * Character-type support functions, equivalent to is* macros, but
- * working with any possible encodings and locales. Note,
+ * working with any possible encodings and locales. Notes:
- * that with multibyte encoding and C-locale isw* function may fail
+ *  - with multibyte encoding and C-locale isw* function may fail
- * or give wrong result. Note 2: multibyte encoding and C-locale 
+ *    or give wrong result. 
- * often are used for Asian languages.
+ *  - multibyte encoding and C-locale often are used for 
 *    Asian languages.
 *  - if locale is C the we use pgwstr instead of wstr
 */
 #ifdef TS_USE_WIDE
@@ -94,8 +110,8 @@ p_is##type(TParser *prs) {													\
 	Assert( prs->state );													\
 	if ( prs->usewide )														\
 	{																		\
-		if ( lc_ctype_is_c() )												\
+		if ( prs->pgwstr )													\
-			return is##type( 0xff & *( prs->wstr + prs->state->poschar) );	\
+			return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
 																			\
 		return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) );	\
 	}																		\
@@ -115,9 +131,9 @@ p_isalnum(TParser *prs)
 	if (prs->usewide)
 	{
-		if (lc_ctype_is_c())
+		if (prs->pgwstr)
 		{
-			unsigned int c = *(prs->wstr + prs->state->poschar);
+			unsigned int c = *(prs->pgwstr + prs->state->poschar);
 			/*
 			 * any non-ascii symbol with multibyte encoding
@@ -148,9 +164,9 @@ p_isalpha(TParser *prs)
 	if (prs->usewide)
 	{
-		if (lc_ctype_is_c())
+		if (prs->pgwstr)
 		{
-			unsigned int c = *(prs->wstr + prs->state->poschar);
+			unsigned int c = *(prs->pgwstr + prs->state->poschar);
 			/*
 			 * any non-ascii symbol with multibyte encoding
--- a/contrib/tsearch2/wordparser/parser.h
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.h,v 1.11.2.1 2009/03/02 15:13:17 teodor Exp $ */
 #ifndef __PARSER_H__
 #define __PARSER_H__
@@ -138,12 +138,13 @@ typedef struct TParser
 	int			lenstr;			/* length of mbstring */
 #ifdef TS_USE_WIDE
 	wchar_t    *wstr;			/* wide character string */
 	pg_wchar   *pgwstr;			/* wide character string for C-locale */
 	int			lenwstr;		/* length of wsting */
 	bool		usewide;
 #endif
 	/* State of parse */
 	int			charmaxlen;
 	bool		usewide;
 	TParserPosition *state;
 	bool		ignore;
 	bool		wanthost;