mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-25 18:38:00 +03:00 
			
		
		
		
	Dot character was not considered as a valid identifier body character. mysql-test/r/xml.result: Adding test case mysql-test/t/xml.test: Adding test case sql/item_xmlfunc.cc: Treat dot character as a valid identifier body part. strings/ctype.c: Fixing to use '/' instead of '.' as a delimiter in charset file parser. strings/xml.c: Fixing to use '/' instead of '.' as a delimiter.
		
			
				
	
	
		
			310 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			310 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright (C) 2000 MySQL AB
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; either version 2 of the License, or
 | |
|    (at your option) any later version.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 | |
| 
 | |
| #include <my_global.h>
 | |
| #include <m_ctype.h>
 | |
| #include <my_xml.h>
 | |
| #ifndef SCO
 | |
| #include <m_string.h>
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /*
 | |
| 
 | |
|   This files implements routines which parse XML based
 | |
|   character set and collation description files.
 | |
|   
 | |
|   Unicode collations are encoded according to
 | |
|   
 | |
|     Unicode Technical Standard #35
 | |
|     Locale Data Markup Language (LDML)
 | |
|     http://www.unicode.org/reports/tr35/
 | |
|   
 | |
|   and converted into ICU string according to
 | |
|   
 | |
|     Collation Customization
 | |
|     http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
 | |
|   
 | |
| */
 | |
| 
 | |
| static char *mstr(char *str,const char *src,uint l1,uint l2)
 | |
| {
 | |
|   l1= l1<l2 ? l1 : l2;
 | |
|   memcpy(str,src,l1);
 | |
|   str[l1]='\0';
 | |
|   return str;
 | |
| }
 | |
| 
 | |
| struct my_cs_file_section_st
 | |
| {
 | |
|   int        state;
 | |
|   const char *str;
 | |
| };
 | |
| 
 | |
| #define _CS_MISC	1
 | |
| #define _CS_ID		2
 | |
| #define _CS_CSNAME	3
 | |
| #define _CS_FAMILY	4
 | |
| #define _CS_ORDER	5
 | |
| #define _CS_COLNAME	6
 | |
| #define _CS_FLAG	7
 | |
| #define _CS_CHARSET	8
 | |
| #define _CS_COLLATION	9
 | |
| #define _CS_UPPERMAP	10
 | |
| #define _CS_LOWERMAP	11
 | |
| #define _CS_UNIMAP	12
 | |
| #define _CS_COLLMAP	13
 | |
| #define _CS_CTYPEMAP	14
 | |
| #define _CS_PRIMARY_ID	15
 | |
| #define _CS_BINARY_ID	16
 | |
| #define _CS_CSDESCRIPT	17
 | |
| #define _CS_RESET	18
 | |
| #define	_CS_DIFF1	19
 | |
| #define	_CS_DIFF2	20
 | |
| #define	_CS_DIFF3	21
 | |
| 
 | |
| 
 | |
| static struct my_cs_file_section_st sec[] =
 | |
| {
 | |
|   {_CS_MISC,		"xml"},
 | |
|   {_CS_MISC,		"xml/version"},
 | |
|   {_CS_MISC,		"xml/encoding"},
 | |
|   {_CS_MISC,		"charsets"},
 | |
|   {_CS_MISC,		"charsets/max-id"},
 | |
|   {_CS_CHARSET,		"charsets/charset"},
 | |
|   {_CS_PRIMARY_ID,	"charsets/charset/primary-id"},
 | |
|   {_CS_BINARY_ID,	"charsets/charset/binary-id"},
 | |
|   {_CS_CSNAME,		"charsets/charset/name"},
 | |
|   {_CS_FAMILY,		"charsets/charset/family"},
 | |
|   {_CS_CSDESCRIPT,	"charsets/charset/description"},
 | |
|   {_CS_MISC,		"charsets/charset/alias"},
 | |
|   {_CS_MISC,		"charsets/charset/ctype"},
 | |
|   {_CS_CTYPEMAP,	"charsets/charset/ctype/map"},
 | |
|   {_CS_MISC,		"charsets/charset/upper"},
 | |
|   {_CS_UPPERMAP,	"charsets/charset/upper/map"},
 | |
|   {_CS_MISC,		"charsets/charset/lower"},
 | |
|   {_CS_LOWERMAP,	"charsets/charset/lower/map"},
 | |
|   {_CS_MISC,		"charsets/charset/unicode"},
 | |
|   {_CS_UNIMAP,		"charsets/charset/unicode/map"},
 | |
|   {_CS_COLLATION,	"charsets/charset/collation"},
 | |
|   {_CS_COLNAME,		"charsets/charset/collation/name"},
 | |
|   {_CS_ID,		"charsets/charset/collation/id"},
 | |
|   {_CS_ORDER,		"charsets/charset/collation/order"},
 | |
|   {_CS_FLAG,		"charsets/charset/collation/flag"},
 | |
|   {_CS_COLLMAP,		"charsets/charset/collation/map"},
 | |
|   {_CS_RESET,		"charsets/charset/collation/rules/reset"},
 | |
|   {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
 | |
|   {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
 | |
|   {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
 | |
|   {0,	NULL}
 | |
| };
 | |
| 
 | |
| static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len)
 | |
| {
 | |
|   struct my_cs_file_section_st *s;
 | |
|   for (s=sec; s->str; s++)
 | |
|   {
 | |
|     if (!strncmp(attr,s->str,len))
 | |
|       return s;
 | |
|   }
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| #define MY_CS_CSDESCR_SIZE	64
 | |
| #define MY_CS_TAILORING_SIZE	128
 | |
| 
 | |
| typedef struct my_cs_file_info
 | |
| {
 | |
|   char   csname[MY_CS_NAME_SIZE];
 | |
|   char   name[MY_CS_NAME_SIZE];
 | |
|   uchar  ctype[MY_CS_CTYPE_TABLE_SIZE];
 | |
|   uchar  to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
 | |
|   uchar  to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
 | |
|   uchar  sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
 | |
|   uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
 | |
|   char   comment[MY_CS_CSDESCR_SIZE];
 | |
|   char   tailoring[MY_CS_TAILORING_SIZE];
 | |
|   size_t tailoring_length;
 | |
|   CHARSET_INFO cs;
 | |
|   int (*add_collation)(CHARSET_INFO *cs);
 | |
| } MY_CHARSET_LOADER;
 | |
| 
 | |
| 
 | |
| 
 | |
| static int fill_uchar(uchar *a,uint size,const char *str, uint len)
 | |
| {
 | |
|   uint i= 0;
 | |
|   const char *s, *b, *e=str+len;
 | |
|   
 | |
|   for (s=str ; s < e ; i++)
 | |
|   { 
 | |
|     for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
 | |
|     b=s;
 | |
|     for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
 | |
|     if (s == b || i > size)
 | |
|       break;
 | |
|     a[i]= (uchar) strtoul(b,NULL,16);
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| static int fill_uint16(uint16 *a,uint size,const char *str, uint len)
 | |
| {
 | |
|   uint i= 0;
 | |
|   
 | |
|   const char *s, *b, *e=str+len;
 | |
|   for (s=str ; s < e ; i++)
 | |
|   { 
 | |
|     for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
 | |
|     b=s;
 | |
|     for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
 | |
|     if (s == b || i > size)
 | |
|       break;
 | |
|     a[i]= (uint16) strtol(b,NULL,16);
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len)
 | |
| {
 | |
|   struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
 | |
|   struct my_cs_file_section_st *s= cs_file_sec(attr,len);
 | |
|   
 | |
|   if ( s && (s->state == _CS_CHARSET))
 | |
|     bzero(&i->cs,sizeof(i->cs));
 | |
|   
 | |
|   if (s && (s->state == _CS_COLLATION))
 | |
|     i->tailoring_length= 0;
 | |
| 
 | |
|   return MY_XML_OK;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len)
 | |
| {
 | |
|   struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
 | |
|   struct my_cs_file_section_st *s= cs_file_sec(attr,len);
 | |
|   int    state= s ? s->state : 0;
 | |
|   int    rc;
 | |
|   
 | |
|   switch(state){
 | |
|   case _CS_COLLATION:
 | |
|     rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
 | |
|     break;
 | |
|   default:
 | |
|     rc=MY_XML_OK;
 | |
|   }
 | |
|   return rc;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int cs_value(MY_XML_PARSER *st,const char *attr, uint len)
 | |
| {
 | |
|   struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
 | |
|   struct my_cs_file_section_st *s;
 | |
|   int    state= (int)((s=cs_file_sec(st->attr, (int) strlen(st->attr))) ? s->state : 0);
 | |
|   
 | |
|   switch (state) {
 | |
|   case _CS_ID:
 | |
|     i->cs.number= strtol(attr,(char**)NULL,10);
 | |
|     break;
 | |
|   case _CS_BINARY_ID:
 | |
|     i->cs.binary_number= strtol(attr,(char**)NULL,10);
 | |
|     break;
 | |
|   case _CS_PRIMARY_ID:
 | |
|     i->cs.primary_number= strtol(attr,(char**)NULL,10);
 | |
|     break;
 | |
|   case _CS_COLNAME:
 | |
|     i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
 | |
|     break;
 | |
|   case _CS_CSNAME:
 | |
|     i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
 | |
|     break;
 | |
|   case _CS_CSDESCRIPT:
 | |
|     i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
 | |
|     break;
 | |
|   case _CS_FLAG:
 | |
|     if (!strncmp("primary",attr,len))
 | |
|       i->cs.state|= MY_CS_PRIMARY;
 | |
|     else if (!strncmp("binary",attr,len))
 | |
|       i->cs.state|= MY_CS_BINSORT;
 | |
|     else if (!strncmp("compiled",attr,len))
 | |
|       i->cs.state|= MY_CS_COMPILED;
 | |
|     break;
 | |
|   case _CS_UPPERMAP:
 | |
|     fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
 | |
|     i->cs.to_upper=i->to_upper;
 | |
|     break;
 | |
|   case _CS_LOWERMAP:
 | |
|     fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
 | |
|     i->cs.to_lower=i->to_lower;
 | |
|     break;
 | |
|   case _CS_UNIMAP:
 | |
|     fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
 | |
|     i->cs.tab_to_uni=i->tab_to_uni;
 | |
|     break;
 | |
|   case _CS_COLLMAP:
 | |
|     fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
 | |
|     i->cs.sort_order=i->sort_order;
 | |
|     break;
 | |
|   case _CS_CTYPEMAP:
 | |
|     fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
 | |
|     i->cs.ctype=i->ctype;
 | |
|     break;
 | |
|   case _CS_RESET:
 | |
|   case _CS_DIFF1:
 | |
|   case _CS_DIFF2:
 | |
|   case _CS_DIFF3:
 | |
|     {
 | |
|       /*
 | |
|         Convert collation description from
 | |
|         Locale Data Markup Language (LDML)
 | |
|         into ICU Collation Customization expression.
 | |
|       */
 | |
|       char arg[16];
 | |
|       const char *cmd[]= {"&","<","<<","<<<"};
 | |
|       i->cs.tailoring= i->tailoring;
 | |
|       mstr(arg,attr,len,sizeof(arg)-1);
 | |
|       if (i->tailoring_length + 20 < sizeof(i->tailoring))
 | |
|       {
 | |
|         char *dst= i->tailoring_length + i->tailoring;
 | |
|         i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   return MY_XML_OK;
 | |
| }
 | |
| 
 | |
| 
 | |
| my_bool my_parse_charset_xml(const char *buf, uint len, 
 | |
| 				    int (*add_collation)(CHARSET_INFO *cs))
 | |
| {
 | |
|   MY_XML_PARSER p;
 | |
|   struct my_cs_file_info i;
 | |
|   my_bool rc;
 | |
|   
 | |
|   my_xml_parser_create(&p);
 | |
|   my_xml_set_enter_handler(&p,cs_enter);
 | |
|   my_xml_set_value_handler(&p,cs_value);
 | |
|   my_xml_set_leave_handler(&p,cs_leave);
 | |
|   i.add_collation= add_collation;
 | |
|   my_xml_set_user_data(&p,(void*)&i);
 | |
|   rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
 | |
|   my_xml_parser_free(&p);
 | |
|   return rc;
 | |
| }
 |