Hi, here are the patches to enhance existing MB handling. This time

I have implemented a framework of encoding translation between the backend and the frontend. Also I have added a new variable setting command: SET CLIENT_ENCODING TO 'encoding'; Other features include: Latin1 support more 8 bit cleaness See doc/README.mb for more details. Note that the pacthes are against May 30 snapshot. Tatsuo Ishii
2025-11-03 09:13:20 +03:00 · 1998-06-16 07:29:54 +00:00
parent 0d8e7f6381
commit cb7cbc16fa
37 changed files with 1115 additions and 341 deletions
--- a/src/include/regex/pg_wchar.h
+++ b/src/include/regex/pg_wchar.h
@@ -1,4 +1,4 @@
-/* $Id: pg_wchar.h,v 1.2 1998/04/27 17:09:12 scrappy Exp $ */
+/* $Id: pg_wchar.h,v 1.3 1998/06/16 07:29:43 momjian Exp $ */

 #ifndef PG_WCHAR_H
 #define PG_WCHAR_H
@@ -11,9 +11,20 @@
 #define EUC_TW 3	/* EUC for Taiwan */
 #define UNICODE 4	/* Unicode UTF-8 */
 #define MULE_INTERNAL 5	/* Mule internal code */
+#define LATIN1 6	/* ISO-8859 Latin 1 */
+#define LATIN2 7	/* ISO-8859 Latin 2 */
+#define LATIN3 8	/* ISO-8859 Latin 3 */
+#define LATIN4 9	/* ISO-8859 Latin 4 */
+#define LATIN5 10	/* ISO-8859 Latin 5 */
+/* followings are for client encoding only */
+#define SJIS 16		/* Shift JIS */

 #ifdef MB
+# if LATIN1 <= MB && MB <= LATIN5
+typedef unsigned char pg_wchar;
+# else
 typedef unsigned int pg_wchar;
+# endif
 #else
 #define pg_wchar char
 #endif
@@ -32,6 +43,28 @@ typedef unsigned int pg_wchar;
 #define IS_LC2(c)	((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
 #define IS_LCPRV2(c)	((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)

+/*
+ * leading characters
+ */
+#define	LC_ISO8859_1	0x81	/* ISO8859 Latin 1 */
+#define	LC_ISO8859_2	0x82	/* ISO8859 Latin 2 */
+#define	LC_ISO8859_3	0x83	/* ISO8859 Latin 3 */
+#define	LC_ISO8859_4	0x84	/* ISO8859 Latin 4 */
+#define	LC_ISO8859_5	0x8d	/* ISO8859 Latin 5 */
+#define	LC_JISX0201K	0x89	/* Japanese 1 byte kana */
+#define	LC_JISX0201R	0x90	/* Japanese 1 byte Roman */
+#define	LC_GB2312_80	0x91	/* Chinese */
+#define	LC_JISX0208	0x92	/* Japanese Kanji */
+#define	LC_KS5601	0x93	/* Korean */
+#define	LC_JISX0212	0x94	/* Japanese Kanji (JISX0212) */
+#define	LC_CNS11643_1	0x95	/* CNS 11643-1992 Plane 1 */
+#define	LC_CNS11643_2	0x96	/* CNS 11643-1992 Plane 2 */
+#define	LC_CNS11643_3	0xf6	/* CNS 11643-1992 Plane 3 */
+#define	LC_CNS11643_4	0xf7	/* CNS 11643-1992 Plane 4 */
+#define	LC_CNS11643_5	0xf8	/* CNS 11643-1992 Plane 5 */
+#define	LC_CNS11643_6	0xf9	/* CNS 11643-1992 Plane 6 */
+#define	LC_CNS11643_7	0xfa	/* CNS 11643-1992 Plane 7 */
+
 #ifdef MB
 extern void pg_mb2wchar(const unsigned char *, pg_wchar *);
 extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int);
@@ -40,6 +73,8 @@ extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
 extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
 extern size_t pg_wchar_strlen(const pg_wchar *);
 extern int pg_mblen(const unsigned char *);
+extern int pg_encoding_mblen(int, const unsigned char *);
+extern int pg_mic_mblen(const unsigned char *);
 extern int pg_mbstrlen(const unsigned char *);
 extern int pg_mbstrlen_with_len(const unsigned char *, int);
 #endif
--- a/src/include/regex/regex2.h
+++ b/src/include/regex/regex2.h
@@ -203,6 +203,8 @@ struct re_guts
 #    define OUT		(USHRT_MAX+1)	/* 2 bytes */
 #  elif MB == UNICODE
 #    define OUT		(USHRT_MAX+1)	/* 2 bytes. assuming UCS-2 */
+#  else
+#    define OUT		(UCHAR_MAX+1)	/* other codes. assuming 1 byte */
 #  endif
 #else
 #  define OUT		(CHAR_MAX+1)	/* a non-character value */