/*-------- * Module : multibyte.c * * Description: New Mlutibyte related additional function. * * Create 2001-03-03 Eiji Tokuya * New Create 2001-09-16 Eiji Tokuya *-------- */ #include "multibyte.h" #include "connection.h" #include "pgapifunc.h" #include #include #include #include #ifndef TRUE #define TRUE 1 #endif pg_CS CS_Table[] = { { "SQL_ASCII", SQL_ASCII }, { "EUC_JP", EUC_JP }, { "EUC_CN", EUC_CN }, { "EUC_KR", EUC_KR }, { "EUC_TW", EUC_TW }, { "JOHAB", JOHAB }, { "UNICODE", UTF8 }, { "MULE_INTERNAL",MULE_INTERNAL }, { "LATIN1", LATIN1 }, { "LATIN2", LATIN2 }, { "LATIN3", LATIN3 }, { "LATIN4", LATIN4 }, { "LATIN5", LATIN5 }, { "LATIN6", LATIN6 }, { "LATIN7", LATIN7 }, { "LATIN8", LATIN8 }, { "LATIN9", LATIN9 }, { "LATIN10", LATIN10 }, { "WIN1256", WIN1256 }, { "TCVN", TCVN }, { "WIN874", WIN874 }, { "KOI8", KOI8R }, { "WIN", WIN1251 }, { "ALT", ALT }, { "ISO_8859_5", ISO_8859_5 }, { "ISO_8859_6", ISO_8859_6 }, { "ISO_8859_7", ISO_8859_7 }, { "ISO_8859_8", ISO_8859_8 }, { "SJIS", SJIS }, { "BIG5", BIG5 }, { "GBK", GBK }, { "UHC", UHC }, { "WIN1250", WIN1250 }, { "OTHER", OTHER } }; int pg_ismb(int characterset_code) { int i=0,MB_CHARACTERSET[]={EUC_JP,EUC_CN,EUC_KR,EUC_TW,UTF8,MULE_INTERNAL,SJIS,BIG5,GBK,UHC,JOHAB}; while (MB_CHARACTERSET[i] != characterset_code || OTHER != MB_CHARACTERSET[i] ) { i++; } return (MB_CHARACTERSET[i]); } int pg_CS_code(const unsigned char *characterset_string) { int i = 0, c = -1; unsigned len = 0; for(i = 0; CS_Table[i].code != OTHER; i++) { if (strstr(characterset_string,CS_Table[i].name)) { if(strlen(CS_Table[i].name) >= len) { len = strlen(CS_Table[i].name); c = CS_Table[i].code; } } } if (c < 0) c = i; return (c); } unsigned char * pg_CS_name(int characterset_code) { int i; for (i = 0; CS_Table[i].code != OTHER; i++) { if (CS_Table[i].code == characterset_code) return CS_Table[i].name; } return ("OTHER"); } int pg_CS_stat(int stat,unsigned int character,int characterset_code) { if (character == 0) stat = 0; switch (characterset_code) { case UTF8: { if (stat < 2 && character >= 0x80) { if (character >= 0xfc) stat = 6; else if (character >= 0xf8) stat = 5; else if (character >= 0xf0) stat = 4; else if (character >= 0xe0) stat = 3; else if (character >= 0xc0) stat = 2; } else if (stat > 2 && character > 0x7f) stat--; else stat=0; } break; /* Shift-JIS Support. */ case SJIS: { if (stat < 2 && character > 0x80 && !(character > 0x9f && character < 0xe0)) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; /* Chinese Big5 Support. */ case BIG5: { if (stat < 2 && character > 0xA0) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; /* Chinese GBK Support. */ case GBK: { if (stat < 2 && character > 0x7F) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; /* Korian UHC Support. */ case UHC: { if (stat < 2 && character > 0x7F) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; /* EUC_JP Support */ case EUC_JP: { if (stat < 3 && character == 0x8f) /* JIS X 0212 */ stat = 3; else if (stat != 2 && (character == 0x8e || character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */ stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; /* EUC_CN, EUC_KR, JOHAB Support */ case EUC_CN: case EUC_KR: case JOHAB: { if (stat < 2 && character > 0xa0) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; case EUC_TW: { if (stat < 4 && character == 0x8e) stat = 4; else if (stat == 4 && character > 0xa0) stat = 3; else if (stat == 3 || stat < 2 && character > 0xa0) stat = 2; else if (stat == 2) stat = 1; else stat = 0; } break; default: { stat = 0; } break; } return stat; } unsigned char * pg_mbschr(int csc, const unsigned char *string, unsigned int character) { int mb_st = 0; unsigned char *s; s = (unsigned char *) string; for(;;) { mb_st = pg_CS_stat(mb_st, (unsigned char) *s, csc); if (mb_st == 0 && (*s == character || *s == 0)) break; else s++; } return (s); } int pg_mbslen(int csc, const unsigned char *string) { unsigned char *s; int len, cs_stat; for (len = 0, cs_stat = 0, s = (unsigned char *) string; *s != 0; s++) { cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, csc); if (cs_stat < 2) len++; } return len; } unsigned char * pg_mbsinc(int csc, const unsigned char *current ) { int mb_stat = 0; if (*current != 0) { mb_stat = (int) pg_CS_stat(mb_stat, *current, csc); if (mb_stat == 0) mb_stat = 1; return ((unsigned char *) current + mb_stat); } else return NULL; } static char * CC_lookup_cs_new(ConnectionClass *self) { char *encstr = NULL; QResultClass *res; res = CC_send_query(self, "select pg_client_encoding()", NULL, TRUE); if (res) { char *enc = QR_get_value_backend_row(res, 0, 0); if (enc) encstr = strdup(enc); QR_Destructor(res); } return encstr; } static char * CC_lookup_cs_old(ConnectionClass *self) { char *encstr = NULL; HSTMT hstmt; RETCODE result; result = PGAPI_AllocStmt(self, &hstmt); if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) return encstr; result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS); if (result == SQL_SUCCESS_WITH_INFO) { char sqlState[8], errormsg[128], enc[32]; if (PGAPI_Error(NULL, NULL, hstmt, sqlState, NULL, errormsg, sizeof(errormsg), NULL) == SQL_SUCCESS && sscanf(errormsg, "%*s %*s %*s %*s %*s %s", enc) > 0) encstr = strdup(enc); } PGAPI_FreeStmt(hstmt, SQL_DROP); return encstr; } void CC_lookup_characterset(ConnectionClass *self) { char *encstr; static char *func = "CC_lookup_characterset"; mylog("%s: entering...\n", func); if (PG_VERSION_LT(self, 7.2)) encstr = CC_lookup_cs_old(self); else encstr = CC_lookup_cs_new(self); if (self->client_encoding) free(self->client_encoding); if (encstr) { self->client_encoding = encstr; self->ccsc = pg_CS_code(encstr); qlog(" [ Client encoding = '%s' (code = %d) ]\n", self->client_encoding, self->ccsc); if (stricmp(pg_CS_name(self->ccsc), encstr)) { qlog(" Client encoding = '%s' and %s\n", self->client_encoding, pg_CS_name(self->ccsc)); self->errornumber = CONN_VALUE_OUT_OF_RANGE; self->errormsg = "client encoding mismatch"; } } else { self->ccsc = SQL_ASCII; self->client_encoding = NULL; } } void encoded_str_constr(encoded_str *encstr, int ccsc, const char *str) { encstr->ccsc = ccsc; encstr->encstr = str; encstr->pos = -1; encstr->ccst = 0; } int encoded_nextchar(encoded_str *encstr) { int chr; chr = encstr->encstr[++encstr->pos]; encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc); return chr; } int encoded_byte_check(encoded_str *encstr, int abspos) { int chr; chr = encstr->encstr[encstr->pos = abspos]; encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc); return chr; }