mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
new files for a different method for doing range validation of character
* genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h: new files for a different method for doing range validation of character data. * Makefile.am, parserInternals.c, include/libxml/Makefile.am, include/libxml/parserInternals.h: modified for new range method. * catalog.c: small enhance for warning message (using one of the new range routines)
This commit is contained in:
10
ChangeLog
10
ChangeLog
@@ -1,3 +1,13 @@
|
|||||||
|
Sat Oct 11 23:11:22 HKT 2003 William Brack <wbrack@mmm.com.hk>
|
||||||
|
|
||||||
|
* genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h:
|
||||||
|
new files for a different method for doing range validation
|
||||||
|
of character data.
|
||||||
|
* Makefile.am, parserInternals.c, include/libxml/Makefile.am,
|
||||||
|
include/libxml/parserInternals.h: modified for new range method.
|
||||||
|
* catalog.c: small enhance for warning message (using one
|
||||||
|
of the new range routines)
|
||||||
|
|
||||||
Sat Oct 11 13:24:57 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
Sat Oct 11 13:24:57 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* valid.c include/libxml/valid.h: adding an serror field to
|
* valid.c include/libxml/valid.h: adding an serror field to
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
|
|||||||
catalog.c globals.c threads.c c14n.c \
|
catalog.c globals.c threads.c c14n.c \
|
||||||
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
|
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
|
||||||
triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \
|
triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \
|
||||||
legacy.c walker.c
|
legacy.c walker.c chvalid.c
|
||||||
else
|
else
|
||||||
libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
|
libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
|
||||||
parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \
|
parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \
|
||||||
@@ -36,7 +36,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
|
|||||||
catalog.c globals.c threads.c c14n.c \
|
catalog.c globals.c threads.c c14n.c \
|
||||||
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
|
xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
|
||||||
xmlreader.c relaxng.c dict.c SAX2.c \
|
xmlreader.c relaxng.c dict.c SAX2.c \
|
||||||
legacy.c xmldwalk.c
|
legacy.c xmldwalk.c chvalid.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DEPS = $(top_builddir)/libxml2.la
|
DEPS = $(top_builddir)/libxml2.la
|
||||||
|
|||||||
@@ -2932,11 +2932,11 @@ xmlInitializeCatalog(void) {
|
|||||||
cur = catalogs;
|
cur = catalogs;
|
||||||
nextent = &catal->xml;
|
nextent = &catal->xml;
|
||||||
while (*cur != '\0') {
|
while (*cur != '\0') {
|
||||||
while (IS_BLANK(*cur))
|
while (xmlIsBlank_ch(*cur))
|
||||||
cur++;
|
cur++;
|
||||||
if (*cur != 0) {
|
if (*cur != 0) {
|
||||||
paths = cur;
|
paths = cur;
|
||||||
while ((*cur != 0) && (!IS_BLANK(*cur)))
|
while ((*cur != 0) && (!xmlIsBlank_ch(*cur)))
|
||||||
cur++;
|
cur++;
|
||||||
path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths);
|
path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths);
|
||||||
if (path != NULL) {
|
if (path != NULL) {
|
||||||
@@ -3015,10 +3015,10 @@ xmlLoadCatalogs(const char *pathss) {
|
|||||||
|
|
||||||
cur = pathss;
|
cur = pathss;
|
||||||
while ((cur != NULL) && (*cur != 0)) {
|
while ((cur != NULL) && (*cur != 0)) {
|
||||||
while (IS_BLANK(*cur)) cur++;
|
while (xmlIsBlank_ch(*cur)) cur++;
|
||||||
if (*cur != 0) {
|
if (*cur != 0) {
|
||||||
paths = cur;
|
paths = cur;
|
||||||
while ((*cur != 0) && (*cur != ':') && (!IS_BLANK(*cur)))
|
while ((*cur != 0) && (*cur != ':') && (!xmlIsBlank_ch(*cur)))
|
||||||
cur++;
|
cur++;
|
||||||
path = xmlStrndup((const xmlChar *)paths, cur - paths);
|
path = xmlStrndup((const xmlChar *)paths, cur - paths);
|
||||||
if (path != NULL) {
|
if (path != NULL) {
|
||||||
|
|||||||
186
chvalid.c
Executable file
186
chvalid.c
Executable file
@@ -0,0 +1,186 @@
|
|||||||
|
/*
|
||||||
|
* chvalid.c: this module implements the character range
|
||||||
|
* validation APIs
|
||||||
|
*
|
||||||
|
* This file is automatically generated from the cvs source
|
||||||
|
* definition files using the genChRanges.py Python script
|
||||||
|
*
|
||||||
|
* Generation date: Sat Oct 11 20:57:37 2003
|
||||||
|
* Sources: chvalid.def
|
||||||
|
* William Brack <wbrack@mmm.com.hk>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "chvalid.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The initial tables ({func_name}_tab) are used to validate whether a
|
||||||
|
* single-byte character is within the specified group. Each table
|
||||||
|
* contains 256 bytes, with each byte representing one of the 256
|
||||||
|
* possible characters. If the table byte is set, the character is
|
||||||
|
* allowed.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
unsigned char xmlIsPubidChar_tab[256] = {
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
|
||||||
|
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
|
||||||
|
0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x01,
|
||||||
|
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||||
|
0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00 };
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131}, {0x134, 0x13e},
|
||||||
|
{0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3}, {0x1cd, 0x1f0},
|
||||||
|
{0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8}, {0x2bb, 0x2c1},
|
||||||
|
{0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c}, {0x38e, 0x3a1},
|
||||||
|
{0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da}, {0x3dc, 0x3dc},
|
||||||
|
{0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3}, {0x401, 0x40c},
|
||||||
|
{0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481}, {0x490, 0x4c4},
|
||||||
|
{0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb}, {0x4ee, 0x4f5},
|
||||||
|
{0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559}, {0x561, 0x586},
|
||||||
|
{0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a},
|
||||||
|
{0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce}, {0x6d0, 0x6d3},
|
||||||
|
{0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939}, {0x93d, 0x93d},
|
||||||
|
{0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8},
|
||||||
|
{0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9dc, 0x9dd},
|
||||||
|
{0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10},
|
||||||
|
{0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36},
|
||||||
|
{0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74},
|
||||||
|
{0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8},
|
||||||
|
{0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd},
|
||||||
|
{0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28},
|
||||||
|
{0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39}, {0xb3d, 0xb3d},
|
||||||
|
{0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a}, {0xb8e, 0xb90},
|
||||||
|
{0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f},
|
||||||
|
{0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9},
|
||||||
|
{0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33},
|
||||||
|
{0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90},
|
||||||
|
{0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcde, 0xcde},
|
||||||
|
{0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28},
|
||||||
|
{0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e}, {0xe30, 0xe30},
|
||||||
|
{0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82}, {0xe84, 0xe84},
|
||||||
|
{0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97},
|
||||||
|
{0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7},
|
||||||
|
{0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0}, {0xeb2, 0xeb3},
|
||||||
|
{0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47}, {0xf49, 0xf69},
|
||||||
|
{0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100}, {0x1102, 0x1103},
|
||||||
|
{0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c}, {0x110e, 0x1112},
|
||||||
|
{0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140}, {0x114c, 0x114c},
|
||||||
|
{0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155}, {0x1159, 0x1159},
|
||||||
|
{0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, {0x1167, 0x1167},
|
||||||
|
{0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173}, {0x1175, 0x1175},
|
||||||
|
{0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab}, {0x11ae, 0x11af},
|
||||||
|
{0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2}, {0x11eb, 0x11eb},
|
||||||
|
{0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9},
|
||||||
|
{0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d},
|
||||||
|
{0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d},
|
||||||
|
{0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe},
|
||||||
|
{0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb},
|
||||||
|
{0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2126, 0x2126},
|
||||||
|
{0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182}, {0x3041, 0x3094},
|
||||||
|
{0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
|
||||||
|
xmlChRangeGroup xmlIsBaseCharGroup = {197, 0, xmlIsBaseChar_srng};
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff}, {0xe000, 0xfffd}};
|
||||||
|
static xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
|
||||||
|
xmlChRangeGroup xmlIsCharGroup = {2, 1, xmlIsChar_srng, xmlIsChar_lrng};
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345},
|
||||||
|
{0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
|
||||||
|
{0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
|
||||||
|
{0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
|
||||||
|
{0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
|
||||||
|
{0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
|
||||||
|
{0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
|
||||||
|
{0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
|
||||||
|
{0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
|
||||||
|
{0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
|
||||||
|
{0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
|
||||||
|
{0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
|
||||||
|
{0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
|
||||||
|
{0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
|
||||||
|
{0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
|
||||||
|
{0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
|
||||||
|
{0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
|
||||||
|
{0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
|
||||||
|
{0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
|
||||||
|
{0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
|
||||||
|
{0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
|
||||||
|
{0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
|
||||||
|
{0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
|
||||||
|
{0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
|
||||||
|
{0x3099, 0x3099}, {0x309a, 0x309a}};
|
||||||
|
xmlChRangeGroup xmlIsCombiningGroup = {95, 0, xmlIsCombining_srng};
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669}, {0x6f0, 0x6f9},
|
||||||
|
{0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, {0xae6, 0xaef},
|
||||||
|
{0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, {0xce6, 0xcef},
|
||||||
|
{0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}};
|
||||||
|
xmlChRangeGroup xmlIsDigitGroup = {14, 0, xmlIsDigit_srng};
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0}, {0x2d1, 0x2d1},
|
||||||
|
{0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46}, {0xec6, 0xec6},
|
||||||
|
{0x3005, 0x3005}, {0x3031, 0x3031}, {0x3032, 0x3032}, {0x3033, 0x3033},
|
||||||
|
{0x3034, 0x3034}, {0x3035, 0x3035}, {0x309d, 0x309d}, {0x309e, 0x309e},
|
||||||
|
{0x30fc, 0x30fc}, {0x30fd, 0x30fd}, {0x30fe, 0x30fe}};
|
||||||
|
xmlChRangeGroup xmlIsExtenderGroup = {17, 0, xmlIsExtender_srng};
|
||||||
|
|
||||||
|
static xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007},
|
||||||
|
{0x3021, 0x3029}, {0x4300, 0x9fa5}, {0xf900, 0xfa2d}};
|
||||||
|
xmlChRangeGroup xmlIsIdeographicGroup = {4, 0, xmlIsIdeographic_srng};
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
|
||||||
|
int low, high, mid;
|
||||||
|
xmlChSRangePtr sptr;
|
||||||
|
xmlChLRangePtr lptr;
|
||||||
|
if (val < 0x10000) { /* is val in 'short' or 'long' array? */
|
||||||
|
if (rptr->nbShortRange == 0)
|
||||||
|
return 0;
|
||||||
|
low = 0;
|
||||||
|
high = rptr->nbShortRange;
|
||||||
|
sptr = rptr->shortRange;
|
||||||
|
while (low <= high) {
|
||||||
|
mid = (low + high) / 2;
|
||||||
|
if ((unsigned short) val < sptr[mid].low)
|
||||||
|
high = mid - 1;
|
||||||
|
else if ((unsigned short) val > sptr[mid].high)
|
||||||
|
low = mid + 1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (rptr->nbLongRange == 0)
|
||||||
|
return 0;
|
||||||
|
low = 0;
|
||||||
|
high = rptr->nbLongRange;
|
||||||
|
lptr = rptr->longRange;
|
||||||
|
while (low <= high) {
|
||||||
|
mid = (low + high) / 2;
|
||||||
|
if (val < lptr[mid].low)
|
||||||
|
high = mid - 1;
|
||||||
|
else if (val > lptr[mid].high)
|
||||||
|
low = mid + 1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
342
chvalid.def
Executable file
342
chvalid.def
Executable file
@@ -0,0 +1,342 @@
|
|||||||
|
name xmlIsChar
|
||||||
|
ur 0x09 0x0a 0x0d 0x20..0xff
|
||||||
|
ur 0x000100..0x00d7ff
|
||||||
|
ur 0x00e000..0x00fffd
|
||||||
|
ur 0x010000..0x10ffff
|
||||||
|
end xmlIsChar
|
||||||
|
|
||||||
|
name xmlIsPubidChar
|
||||||
|
ur 0x20 0x0d 0x0a 'a'..'z' 'A'..'Z' '0'..'9'
|
||||||
|
ur '-' '\' '(' ')' '+' ',' '.' '/'
|
||||||
|
ur ':' '=' '?' ';' '!' '*' '#' '@'
|
||||||
|
ur '$' '_' '%'
|
||||||
|
end
|
||||||
|
|
||||||
|
name xmlIsBlank
|
||||||
|
ur 0x09 0x0a 0x0d 0x20
|
||||||
|
end xmlIsBlank
|
||||||
|
|
||||||
|
name xmlIsBaseChar
|
||||||
|
ur 0x41..0x5a 0x61..0x7a 0xc0..0xd6 0xd8..0xf6 0xf8..0xff
|
||||||
|
ur 0x000100..0x000131
|
||||||
|
ur 0x000134..0x00013e
|
||||||
|
ur 0x000141..0x000148
|
||||||
|
ur 0x00014a..0x00017e
|
||||||
|
ur 0x000180..0x0001c3
|
||||||
|
ur 0x0001cd..0x0001f0
|
||||||
|
ur 0x0001f4..0x0001f5
|
||||||
|
ur 0x0001fa..0x000217
|
||||||
|
ur 0x000250..0x0002a8
|
||||||
|
ur 0x0002bb..0x0002c1
|
||||||
|
ur 0x000386..0x000386
|
||||||
|
ur 0x000388..0x00038a
|
||||||
|
ur 0x00038c
|
||||||
|
ur 0x00038e..0x0003a1
|
||||||
|
ur 0x0003a3..0x0003ce
|
||||||
|
ur 0x0003d0..0x0003d6
|
||||||
|
ur 0x0003da
|
||||||
|
ur 0x0003dc
|
||||||
|
ur 0x0003de
|
||||||
|
ur 0x0003e0
|
||||||
|
ur 0x0003e2..0x0003f3
|
||||||
|
ur 0x000401..0x00040c
|
||||||
|
ur 0x00040e..0x00044f
|
||||||
|
ur 0x000451..0x00045c
|
||||||
|
ur 0x00045e..0x000481
|
||||||
|
ur 0x000490..0x0004c4
|
||||||
|
ur 0x0004c7..0x0004c8
|
||||||
|
ur 0x0004cb..0x0004cc
|
||||||
|
ur 0x0004d0..0x0004eb
|
||||||
|
ur 0x0004ee..0x0004f5
|
||||||
|
ur 0x0004f8..0x0004f9
|
||||||
|
ur 0x000531..0x000556
|
||||||
|
ur 0x000559
|
||||||
|
ur 0x000561..0x000586
|
||||||
|
ur 0x0005d0..0x0005ea
|
||||||
|
ur 0x0005f0..0x0005f2
|
||||||
|
ur 0x000621..0x00063a
|
||||||
|
ur 0x000641..0x00064a
|
||||||
|
ur 0x000671..0x0006b7
|
||||||
|
ur 0x0006ba..0x0006be
|
||||||
|
ur 0x0006c0..0x0006ce
|
||||||
|
ur 0x0006d0..0x0006d3
|
||||||
|
ur 0x0006d5
|
||||||
|
ur 0x0006e5..0x0006e6
|
||||||
|
ur 0x000905..0x000939
|
||||||
|
ur 0x00093d
|
||||||
|
ur 0x000958..0x000961
|
||||||
|
ur 0x000985..0x00098c
|
||||||
|
ur 0x00098f..0x000990
|
||||||
|
ur 0x000993..0x0009a8
|
||||||
|
ur 0x0009aa..0x0009b0
|
||||||
|
ur 0x0009b2
|
||||||
|
ur 0x0009b6..0x0009b9
|
||||||
|
ur 0x0009dc..0x0009dd
|
||||||
|
ur 0x0009df..0x0009e1
|
||||||
|
ur 0x0009f0..0x0009f1
|
||||||
|
ur 0x000a05..0x000a0a
|
||||||
|
ur 0x000a0f..0x000a10
|
||||||
|
ur 0x000a13..0x000a28
|
||||||
|
ur 0x000a2a..0x000a30
|
||||||
|
ur 0x000a32..0x000a33
|
||||||
|
ur 0x000a35..0x000a36
|
||||||
|
ur 0x000a38..0x000a39
|
||||||
|
ur 0x000a59..0x000a5c
|
||||||
|
ur 0x000a5e
|
||||||
|
ur 0x000a72..0x000a74
|
||||||
|
ur 0x000a85..0x000a8b
|
||||||
|
ur 0x000a8d
|
||||||
|
ur 0x000a8f..0x000a91
|
||||||
|
ur 0x000a93..0x000aa8
|
||||||
|
ur 0x000aaa..0x000ab0
|
||||||
|
ur 0x000ab2..0x000ab3
|
||||||
|
ur 0x000ab5..0x000ab9
|
||||||
|
ur 0x000abd
|
||||||
|
ur 0x000ae0
|
||||||
|
ur 0x000b05..0x000b0c
|
||||||
|
ur 0x000b0f..0x000b10
|
||||||
|
ur 0x000b13..0x000b28
|
||||||
|
ur 0x000b2a..0x000b30
|
||||||
|
ur 0x000b32..0x000b33
|
||||||
|
ur 0x000b36..0x000b39
|
||||||
|
ur 0x000b3d
|
||||||
|
ur 0x000b5c..0x000b5d
|
||||||
|
ur 0x000b5f..0x000b61
|
||||||
|
ur 0x000b85..0x000b8a
|
||||||
|
ur 0x000b8e..0x000b90
|
||||||
|
ur 0x000b92..0x000b95
|
||||||
|
ur 0x000b99..0x000b9a
|
||||||
|
ur 0x000b9c
|
||||||
|
ur 0x000b9e..0x000b9f
|
||||||
|
ur 0x000ba3..0x000ba4
|
||||||
|
ur 0x000ba8..0x000baa
|
||||||
|
ur 0x000bae..0x000bb5
|
||||||
|
ur 0x000bb7..0x000bb9
|
||||||
|
ur 0x000c05..0x000c0c
|
||||||
|
ur 0x000c0e..0x000c10
|
||||||
|
ur 0x000c12..0x000c28
|
||||||
|
ur 0x000c2a..0x000c33
|
||||||
|
ur 0x000c35..0x000c39
|
||||||
|
ur 0x000c60..0x000c61
|
||||||
|
ur 0x000c85..0x000c8c
|
||||||
|
ur 0x000c8e..0x000c90
|
||||||
|
ur 0x000c92..0x000ca8
|
||||||
|
ur 0x000caa..0x000cb3
|
||||||
|
ur 0x000cb5..0x000cb9
|
||||||
|
ur 0x000cde
|
||||||
|
ur 0x000ce0..0x000ce1
|
||||||
|
ur 0x000d05..0x000d0c
|
||||||
|
ur 0x000d0e..0x000d10
|
||||||
|
ur 0x000d12..0x000d28
|
||||||
|
ur 0x000d2a..0x000d39
|
||||||
|
ur 0x000d60..0x000d61
|
||||||
|
ur 0x000e01..0x000e2e
|
||||||
|
ur 0x000e30
|
||||||
|
ur 0x000e32..0x000e33
|
||||||
|
ur 0x000e40..0x000e45
|
||||||
|
ur 0x000e81..0x000e82
|
||||||
|
ur 0x000e84..0x000e84
|
||||||
|
ur 0x000e87..0x000e88
|
||||||
|
ur 0x000e8a
|
||||||
|
ur 0x000e8d
|
||||||
|
ur 0x000e94..0x000e97
|
||||||
|
ur 0x000e99..0x000e9f
|
||||||
|
ur 0x000ea1..0x000ea3
|
||||||
|
ur 0x000ea5
|
||||||
|
ur 0x000ea7
|
||||||
|
ur 0x000eaa..0x000eab
|
||||||
|
ur 0x000ead..0x000eae
|
||||||
|
ur 0x000eb0
|
||||||
|
ur 0x000eb2..0x000eb3
|
||||||
|
ur 0x000ebd
|
||||||
|
ur 0x000ec0..0x000ec4
|
||||||
|
ur 0x000f40..0x000f47
|
||||||
|
ur 0x000f49..0x000f69
|
||||||
|
ur 0x0010a0..0x0010c5
|
||||||
|
ur 0x0010d0..0x0010f6
|
||||||
|
ur 0x001100
|
||||||
|
ur 0x001102..0x001103
|
||||||
|
ur 0x001105..0x001107
|
||||||
|
ur 0x001109
|
||||||
|
ur 0x00110b..0x00110c
|
||||||
|
ur 0x00110e..0x001112
|
||||||
|
ur 0x00113c
|
||||||
|
ur 0x00113e
|
||||||
|
ur 0x001140
|
||||||
|
ur 0x00114c
|
||||||
|
ur 0x00114e
|
||||||
|
ur 0x001150
|
||||||
|
ur 0x001154..0x001155
|
||||||
|
ur 0x001159
|
||||||
|
ur 0x00115f..0x001161
|
||||||
|
ur 0x001163
|
||||||
|
ur 0x001165
|
||||||
|
ur 0x001167
|
||||||
|
ur 0x001169
|
||||||
|
ur 0x00116d..0x00116e
|
||||||
|
ur 0x001172..0x001173
|
||||||
|
ur 0x001175
|
||||||
|
ur 0x00119e
|
||||||
|
ur 0x0011a8
|
||||||
|
ur 0x0011ab
|
||||||
|
ur 0x0011ae..0x0011af
|
||||||
|
ur 0x0011b7..0x0011b8
|
||||||
|
ur 0x0011ba
|
||||||
|
ur 0x0011bc..0x0011c2
|
||||||
|
ur 0x0011eb
|
||||||
|
ur 0x0011f0
|
||||||
|
ur 0x0011f9
|
||||||
|
ur 0x001e00..0x001e9b
|
||||||
|
ur 0x001ea0..0x001ef9
|
||||||
|
ur 0x001f00..0x001f15
|
||||||
|
ur 0x001f18..0x001f1d
|
||||||
|
ur 0x001f20..0x001f45
|
||||||
|
ur 0x001f48..0x001f4d
|
||||||
|
ur 0x001f50..0x001f57
|
||||||
|
ur 0x001f59
|
||||||
|
ur 0x001f5b
|
||||||
|
ur 0x001f5d
|
||||||
|
ur 0x001f5f..0x001f7d
|
||||||
|
ur 0x001f80..0x001fb4
|
||||||
|
ur 0x001fb6..0x001fbc
|
||||||
|
ur 0x001fbe
|
||||||
|
ur 0x001fc2..0x001fc4
|
||||||
|
ur 0x001fc6..0x001fcc
|
||||||
|
ur 0x001fd0..0x001fd3
|
||||||
|
ur 0x001fd6..0x001fdb
|
||||||
|
ur 0x001fe0..0x001fec
|
||||||
|
ur 0x001ff2..0x001ff4
|
||||||
|
ur 0x001ff6..0x001ffc
|
||||||
|
ur 0x002126
|
||||||
|
ur 0x00212a..0x00212b
|
||||||
|
ur 0x00212e
|
||||||
|
ur 0x002180..0x002182
|
||||||
|
ur 0x003041..0x003094
|
||||||
|
ur 0x0030a1..0x0030fa
|
||||||
|
ur 0x003105..0x00312c
|
||||||
|
ur 0x00ac00..0x00d7a3
|
||||||
|
end
|
||||||
|
name xmlIsDigit
|
||||||
|
ur 0x30..0x39
|
||||||
|
ur 0x660..0x669
|
||||||
|
ur 0x6f0..0x6f9
|
||||||
|
ur 0x966..0x96f
|
||||||
|
ur 0x9e6..0x9ef
|
||||||
|
ur 0xa66..0xa6f
|
||||||
|
ur 0xae6..0xaef
|
||||||
|
ur 0xb66..0xb6f
|
||||||
|
ur 0xbe7..0xbef
|
||||||
|
ur 0xc66..0xc6f
|
||||||
|
ur 0xce6..0xcef
|
||||||
|
ur 0xd66..0xd6f
|
||||||
|
ur 0xe50..0xe59
|
||||||
|
ur 0xed0..0xed9
|
||||||
|
ur 0xf20..0xf29
|
||||||
|
end
|
||||||
|
name xmlIsCombining
|
||||||
|
ur 0x0300..0x0345
|
||||||
|
ur 0x0360..0x0361
|
||||||
|
ur 0x0483..0x0486
|
||||||
|
ur 0x0591..0x05A1
|
||||||
|
ur 0x05A3..0x05B9
|
||||||
|
ur 0x05BB..0x05BD
|
||||||
|
ur 0x05BF
|
||||||
|
ur 0x05C1..0x05C2
|
||||||
|
ur 0x05C4
|
||||||
|
ur 0x064B..0x0652
|
||||||
|
ur 0x0670
|
||||||
|
ur 0x06D6..0x06DC
|
||||||
|
ur 0x06DD..0x06DF
|
||||||
|
ur 0x06E0..0x06E4
|
||||||
|
ur 0x06E7..0x06E8
|
||||||
|
ur 0x06EA..0x06ED
|
||||||
|
ur 0x0901..0x0903
|
||||||
|
ur 0x093C
|
||||||
|
ur 0x093E..0x094C
|
||||||
|
ur 0x094D
|
||||||
|
ur 0x0951..0x0954
|
||||||
|
ur 0x0962..0x0963
|
||||||
|
ur 0x0981..0x0983
|
||||||
|
ur 0x09BC
|
||||||
|
ur 0x09BE
|
||||||
|
ur 0x09BF
|
||||||
|
ur 0x09C0..0x09C4
|
||||||
|
ur 0x09C7..0x09C8
|
||||||
|
ur 0x09CB..0x09CD
|
||||||
|
ur 0x09D7
|
||||||
|
ur 0x09E2..0x09E3
|
||||||
|
ur 0x0A02
|
||||||
|
ur 0x0A3C
|
||||||
|
ur 0x0A3E
|
||||||
|
ur 0x0A3F
|
||||||
|
ur 0x0A40..0x0A42
|
||||||
|
ur 0x0A47..0x0A48
|
||||||
|
ur 0x0A4B..0x0A4D
|
||||||
|
ur 0x0A70..0x0A71
|
||||||
|
ur 0x0A81..0x0A83
|
||||||
|
ur 0x0ABC
|
||||||
|
ur 0x0ABE..0x0AC5
|
||||||
|
ur 0x0AC7..0x0AC9
|
||||||
|
ur 0x0ACB..0x0ACD
|
||||||
|
ur 0x0B01..0x0B03
|
||||||
|
ur 0x0B3C
|
||||||
|
ur 0x0B3E..0x0B43
|
||||||
|
ur 0x0B47..0x0B48
|
||||||
|
ur 0x0B4B..0x0B4D
|
||||||
|
ur 0x0B56..0x0B57
|
||||||
|
ur 0x0B82..0x0B83
|
||||||
|
ur 0x0BBE..0x0BC2
|
||||||
|
ur 0x0BC6..0x0BC8
|
||||||
|
ur 0x0BCA..0x0BCD
|
||||||
|
ur 0x0BD7
|
||||||
|
ur 0x0C01..0x0C03
|
||||||
|
ur 0x0C3E..0x0C44
|
||||||
|
ur 0x0C46..0x0C48
|
||||||
|
ur 0x0C4A..0x0C4D
|
||||||
|
ur 0x0C55..0x0C56
|
||||||
|
ur 0x0C82..0x0C83
|
||||||
|
ur 0x0CBE..0x0CC4
|
||||||
|
ur 0x0CC6..0x0CC8
|
||||||
|
ur 0x0CCA..0x0CCD
|
||||||
|
ur 0x0CD5..0x0CD6
|
||||||
|
ur 0x0D02..0x0D03
|
||||||
|
ur 0x0D3E..0x0D43
|
||||||
|
ur 0x0D46..0x0D48
|
||||||
|
ur 0x0D4A..0x0D4D
|
||||||
|
ur 0x0D57
|
||||||
|
ur 0x0E31
|
||||||
|
ur 0x0E34..0x0E3A
|
||||||
|
ur 0x0E47..0x0E4E
|
||||||
|
ur 0x0EB1
|
||||||
|
ur 0x0EB4..0x0EB9
|
||||||
|
ur 0x0EBB..0x0EBC
|
||||||
|
ur 0x0EC8..0x0ECD
|
||||||
|
ur 0x0F18..0x0F19
|
||||||
|
ur 0x0F35
|
||||||
|
ur 0x0F37
|
||||||
|
ur 0x0F39
|
||||||
|
ur 0x0F3E
|
||||||
|
ur 0x0F3F
|
||||||
|
ur 0x0F71..0x0F84
|
||||||
|
ur 0x0F86..0x0F8B
|
||||||
|
ur 0x0F90..0x0F95
|
||||||
|
ur 0x0F97
|
||||||
|
ur 0x0F99..0x0FAD
|
||||||
|
ur 0x0FB1..0x0FB7
|
||||||
|
ur 0x0FB9
|
||||||
|
ur 0x20D0..0x20DC
|
||||||
|
ur 0x20E1
|
||||||
|
ur 0x302A..0x302F
|
||||||
|
ur 0x3099
|
||||||
|
ur 0x309A
|
||||||
|
end
|
||||||
|
name xmlIsExtender
|
||||||
|
ur 0xb7 0x2d0 0x2d1 0x387 0x640 0xe46 0xec6 0x3005 0x3031 0x3032
|
||||||
|
ur 0x3033 0x3034 0x3035 0x309d 0x309e 0x30fc 0x30fd 0x30fe
|
||||||
|
end
|
||||||
|
name xmlIsIdeographic
|
||||||
|
ur 0x4300..0x9fa5
|
||||||
|
ur 0xf900..0xfa2d
|
||||||
|
ur 0x3021..0x3029
|
||||||
|
ur 0x3007
|
||||||
|
end
|
||||||
105
chvalid.h
Normal file
105
chvalid.h
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
* chvalid.h: this header exports interfaces for the character
|
||||||
|
* range validation APIs
|
||||||
|
*
|
||||||
|
* This file is automatically generated from the cvs source
|
||||||
|
* definition files using the genChRanges.py Python script
|
||||||
|
*
|
||||||
|
* Generation date: Sat Oct 11 20:57:37 2003
|
||||||
|
* Sources: chvalid.def
|
||||||
|
* William Brack <wbrack@mmm.com.hk>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __XML_CHVALID_H__
|
||||||
|
#define __XML_CHVALID_H__
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define our typedefs and structures
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
typedef struct _xmlChSRange xmlChSRange;
|
||||||
|
typedef xmlChSRange *xmlChSRangePtr;
|
||||||
|
struct _xmlChSRange {
|
||||||
|
unsigned short low;
|
||||||
|
unsigned short high;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _xmlChLRange xmlChLRange;
|
||||||
|
typedef xmlChLRange *xmlChLRangePtr;
|
||||||
|
struct _xmlChLRange {
|
||||||
|
unsigned low;
|
||||||
|
unsigned high;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _xmlChRangeGroup xmlChRangeGroup;
|
||||||
|
typedef xmlChRangeGroup *xmlChRangeGroupPtr;
|
||||||
|
struct _xmlChRangeGroup {
|
||||||
|
int nbShortRange;
|
||||||
|
int nbLongRange;
|
||||||
|
xmlChSRangePtr shortRange; /* points to an array of ranges */
|
||||||
|
xmlChLRangePtr longRange;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Range checking routine */
|
||||||
|
int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
|
||||||
|
|
||||||
|
#define xmlIsBaseChar_ch(c) ( ((0x41<= (c)) && ((c) <= 0x5a)) || \
|
||||||
|
((0x61<= (c)) && ((c) <= 0x7a)) || \
|
||||||
|
((0xc0<= (c)) && ((c) <= 0xd6)) || \
|
||||||
|
((0xd8<= (c)) && ((c) <= 0xf6)) || \
|
||||||
|
((0xf8<= (c)) && ((c) <= 0xff)))
|
||||||
|
#define xmlIsBaseChar(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsBaseChar_ch((c)) : \
|
||||||
|
xmlCharInRange((c), &xmlIsBaseCharGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsBaseCharGroup;
|
||||||
|
#define xmlIsBlank_ch(c) ( ((c) == 0x20) || \
|
||||||
|
((0x9<= (c)) && ((c) <= 0xa)) || \
|
||||||
|
((c) == 0xd))
|
||||||
|
#define xmlIsBlank(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsBlank_ch((c)) : 0)
|
||||||
|
|
||||||
|
#define xmlIsChar_ch(c) ( ((0x9<= (c)) && ((c) <= 0xa)) || \
|
||||||
|
((c) == 0xd) || \
|
||||||
|
((0x20<= (c)) && ((c) <= 0xff)))
|
||||||
|
#define xmlIsChar(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsChar_ch((c)) : \
|
||||||
|
xmlCharInRange((c), &xmlIsCharGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsCharGroup;
|
||||||
|
#define xmlIsCombining(c) (((c) < 0x100) ? \
|
||||||
|
0 : \
|
||||||
|
xmlCharInRange((c), &xmlIsCombiningGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsCombiningGroup;
|
||||||
|
#define xmlIsDigit_ch(c) ( ((0x30<= (c)) && ((c) <= 0x39)))
|
||||||
|
#define xmlIsDigit(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsDigit_ch((c)) : \
|
||||||
|
xmlCharInRange((c), &xmlIsDigitGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsDigitGroup;
|
||||||
|
#define xmlIsExtender_ch(c) ( ((c) == 0xb7))
|
||||||
|
#define xmlIsExtender(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsExtender_ch((c)) : \
|
||||||
|
xmlCharInRange((c), &xmlIsExtenderGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsExtenderGroup;
|
||||||
|
#define xmlIsIdeographic(c) (((c) < 0x100) ? \
|
||||||
|
0 : \
|
||||||
|
xmlCharInRange((c), &xmlIsIdeographicGroup))
|
||||||
|
|
||||||
|
extern xmlChRangeGroup xmlIsIdeographicGroup;
|
||||||
|
extern unsigned char xmlIsPubidChar_tab[256];
|
||||||
|
#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)])
|
||||||
|
#define xmlIsPubidChar(c) (((c) < 0x100) ? \
|
||||||
|
xmlIsPubidChar_ch((c)) : 0)
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif /* __XML_CHVALID_H__ */
|
||||||
465
genChRanges.py
Executable file
465
genChRanges.py
Executable file
@@ -0,0 +1,465 @@
|
|||||||
|
#!/usr/bin/python -u
|
||||||
|
#
|
||||||
|
# Portions of this script have been (shamelessly) stolen from the
|
||||||
|
# prior work of Daniel Veillard (genUnicode.py)
|
||||||
|
#
|
||||||
|
# I, however, take full credit for any bugs, errors or difficulties :-)
|
||||||
|
#
|
||||||
|
# William Brack
|
||||||
|
# October 2003
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import string
|
||||||
|
import time
|
||||||
|
|
||||||
|
#
|
||||||
|
# A little routine to assign a 'meaningful' name to a range
|
||||||
|
#
|
||||||
|
def rangename( intvl ):
|
||||||
|
(start, end) = intvl
|
||||||
|
rname = "r" + hex(start)[2:] + "x" + hex(end)[2:]
|
||||||
|
return rname
|
||||||
|
|
||||||
|
#
|
||||||
|
# A routine to take a list of yes/no (1, 0) values and turn it
|
||||||
|
# into a list of ranges. This will later be used to determine whether
|
||||||
|
# to generate single-byte lookup tables, or inline comparisons
|
||||||
|
#
|
||||||
|
def makeRange(lst):
|
||||||
|
ret = []
|
||||||
|
pos = 0
|
||||||
|
while pos < len(lst):
|
||||||
|
try: # index generates exception if not present
|
||||||
|
s = lst[pos:].index(1) # look for start of next range
|
||||||
|
except:
|
||||||
|
break # if no more, finished
|
||||||
|
pos += s # pointer to start of possible range
|
||||||
|
try:
|
||||||
|
e = lst[pos:].index(0) # look for end of range
|
||||||
|
e += pos
|
||||||
|
except: # if no end, set to end of list
|
||||||
|
e = len(lst)
|
||||||
|
ret.append((pos, e-1)) # append range tuple to list
|
||||||
|
pos = e + 1 # ready to check for next range
|
||||||
|
return ret
|
||||||
|
|
||||||
|
sources = "chvalid.def" # input filename
|
||||||
|
|
||||||
|
# minTableSize gives the minimum number of ranges which must be present
|
||||||
|
# before a 256-byte lookup table is produced. If there are less than this
|
||||||
|
# number, a macro with inline comparisons is generated
|
||||||
|
minTableSize = 6
|
||||||
|
|
||||||
|
# dictionary of ranges, key=range, element contains list of funcs using it
|
||||||
|
Ranges = {}
|
||||||
|
|
||||||
|
# dictionary of functions, key=name, element contains char-map and range-list
|
||||||
|
Functs = {}
|
||||||
|
|
||||||
|
state = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
defines = open("chvalid.def", "r")
|
||||||
|
except:
|
||||||
|
print "Missing chvalid.def, aborting ..."
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
#
|
||||||
|
# The lines in the .def file have three types:-
|
||||||
|
# name: Defines a new function block
|
||||||
|
# ur: Defines individual or ranges of unicode values
|
||||||
|
# end: Indicates the end of the function block
|
||||||
|
#
|
||||||
|
# These lines are processed below.
|
||||||
|
#
|
||||||
|
for line in defines.readlines():
|
||||||
|
# ignore blank lines, or lines beginning with '#'
|
||||||
|
if line[0] == '#':
|
||||||
|
continue
|
||||||
|
line = string.strip(line)
|
||||||
|
if line == '':
|
||||||
|
continue
|
||||||
|
# split line into space-separated fields, then split on type
|
||||||
|
try:
|
||||||
|
fields = string.split(line, ' ')
|
||||||
|
#
|
||||||
|
# name line:
|
||||||
|
# validate any previous function block already ended
|
||||||
|
# validate this function not already defined
|
||||||
|
# initialize an entry in the function dicitonary
|
||||||
|
# including a mask table with no values yet defined
|
||||||
|
#
|
||||||
|
if fields[0] == 'name':
|
||||||
|
name = fields[1]
|
||||||
|
if state != 0:
|
||||||
|
print "'name' %s found before previous name" \
|
||||||
|
"completed" % (fields[1])
|
||||||
|
continue
|
||||||
|
state = 1
|
||||||
|
if Functs.has_key(name):
|
||||||
|
print "name '%s' already present - may give" \
|
||||||
|
" wrong results" % (name)
|
||||||
|
else:
|
||||||
|
# dict entry with two list elements (chdata, rangedata)
|
||||||
|
Functs[name] = [ [], [] ]
|
||||||
|
for v in range(256):
|
||||||
|
Functs[name][0].append(0)
|
||||||
|
#
|
||||||
|
# end line:
|
||||||
|
# validate there was a preceding function name line
|
||||||
|
# set state to show no current function active
|
||||||
|
#
|
||||||
|
elif fields[0] == 'end':
|
||||||
|
if state == 0:
|
||||||
|
print "'end' found outside of function block"
|
||||||
|
continue
|
||||||
|
state = 0
|
||||||
|
|
||||||
|
#
|
||||||
|
# ur line:
|
||||||
|
# validate function has been defined
|
||||||
|
# process remaining fields on the line, which may be either
|
||||||
|
# individual unicode values or ranges of values
|
||||||
|
#
|
||||||
|
elif fields[0] == 'ur':
|
||||||
|
if state != 1:
|
||||||
|
raise ValidationError, "'ur' found outside of 'name' block"
|
||||||
|
for el in fields[1:]:
|
||||||
|
pos = string.find(el, '..')
|
||||||
|
# pos <=0 means not a range, so must be individual value
|
||||||
|
if pos <= 0:
|
||||||
|
# cheap handling of hex or decimal values
|
||||||
|
if el[0:2] == '0x':
|
||||||
|
value = int(el[2:],16)
|
||||||
|
elif el[0] == "'":
|
||||||
|
value = ord(el[1])
|
||||||
|
else:
|
||||||
|
value = int(el)
|
||||||
|
if ((value < 0) | (value > 0x1fffff)):
|
||||||
|
raise ValidationError, 'Illegal value (%s) in ch for'\
|
||||||
|
' name %s' % (el,name)
|
||||||
|
# for ur we have only ranges (makes things simpler),
|
||||||
|
# so convert val to range
|
||||||
|
currange = (value, value)
|
||||||
|
# pos > 0 means this is a range, so isolate/validate
|
||||||
|
# the interval
|
||||||
|
else:
|
||||||
|
# split the range into it's first-val, last-val
|
||||||
|
(first, last) = string.split(el, "..")
|
||||||
|
# convert values from text into binary
|
||||||
|
if first[0:2] == '0x':
|
||||||
|
start = int(first[2:],16)
|
||||||
|
elif first[0] == "'":
|
||||||
|
start = ord(first[1])
|
||||||
|
else:
|
||||||
|
start = int(first)
|
||||||
|
if last[0:2] == '0x':
|
||||||
|
end = int(last[2:],16)
|
||||||
|
elif last[0] == "'":
|
||||||
|
end = ord(last[1])
|
||||||
|
else:
|
||||||
|
end = int(last)
|
||||||
|
if (start < 0) | (end > 0x1fffff) | (start > end):
|
||||||
|
raise ValidationError, "Invalid range '%s'" % el
|
||||||
|
currange = (start, end)
|
||||||
|
# common path - 'currange' has the range, now take care of it
|
||||||
|
# We split on single-byte values vs. multibyte
|
||||||
|
if currange[1] < 0x100: # single-byte
|
||||||
|
for ch in range(currange[0],currange[1]+1):
|
||||||
|
# validate that value not previously defined
|
||||||
|
if Functs[name][0][ch]:
|
||||||
|
msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
|
||||||
|
raise ValidationError, msg
|
||||||
|
Functs[name][0][ch] = 1
|
||||||
|
else: # multi-byte
|
||||||
|
if Ranges.has_key(currange):
|
||||||
|
Ranges[currange].append(name)
|
||||||
|
else:
|
||||||
|
Ranges[currange] = [ name ]
|
||||||
|
if currange in Functs[name][1]:
|
||||||
|
raise ValidationError, "range already defined in" \
|
||||||
|
" function"
|
||||||
|
else:
|
||||||
|
Functs[name][1].append(currange)
|
||||||
|
|
||||||
|
except:
|
||||||
|
print "Failed to process line: %s" % (line)
|
||||||
|
raise
|
||||||
|
#
|
||||||
|
# At this point, the entire definition file has been processed. Now we
|
||||||
|
# enter the output phase, where we generate the two files chvalid.c and'
|
||||||
|
# chvalid.h
|
||||||
|
#
|
||||||
|
# To do this, we first output the 'static' data (heading, fixed
|
||||||
|
# definitions, etc.), then output the 'dynamic' data (the results
|
||||||
|
# of the above processing), and finally output closing 'static' data
|
||||||
|
# (e.g. the subroutine to process the ranges)
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Generate the headings:
|
||||||
|
#
|
||||||
|
try:
|
||||||
|
header = open("chvalid.h", "w")
|
||||||
|
except:
|
||||||
|
print "Failed to open chvalid.h"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
output = open("chvalid.c", "w")
|
||||||
|
except:
|
||||||
|
print "Failed to open chvalid.c"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
date = time.asctime(time.localtime(time.time()))
|
||||||
|
|
||||||
|
header.write(
|
||||||
|
"""/*
|
||||||
|
* chvalid.h: this header exports interfaces for the character
|
||||||
|
* range validation APIs
|
||||||
|
*
|
||||||
|
* This file is automatically generated from the cvs source
|
||||||
|
* definition files using the genChRanges.py Python script
|
||||||
|
*
|
||||||
|
* Generation date: %s
|
||||||
|
* Sources: %s
|
||||||
|
* William Brack <wbrack@mmm.com.hk>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __XML_CHVALID_H__
|
||||||
|
#define __XML_CHVALID_H__
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define our typedefs and structures
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
typedef struct _xmlChSRange xmlChSRange;
|
||||||
|
typedef xmlChSRange *xmlChSRangePtr;
|
||||||
|
struct _xmlChSRange {
|
||||||
|
unsigned short low;
|
||||||
|
unsigned short high;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _xmlChLRange xmlChLRange;
|
||||||
|
typedef xmlChLRange *xmlChLRangePtr;
|
||||||
|
struct _xmlChLRange {
|
||||||
|
unsigned low;
|
||||||
|
unsigned high;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct _xmlChRangeGroup xmlChRangeGroup;
|
||||||
|
typedef xmlChRangeGroup *xmlChRangeGroupPtr;
|
||||||
|
struct _xmlChRangeGroup {
|
||||||
|
int nbShortRange;
|
||||||
|
int nbLongRange;
|
||||||
|
xmlChSRangePtr shortRange; /* points to an array of ranges */
|
||||||
|
xmlChLRangePtr longRange;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Range checking routine */
|
||||||
|
int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
|
||||||
|
|
||||||
|
""" % (date, sources));
|
||||||
|
output.write(
|
||||||
|
"""/*
|
||||||
|
* chvalid.c: this module implements the character range
|
||||||
|
* validation APIs
|
||||||
|
*
|
||||||
|
* This file is automatically generated from the cvs source
|
||||||
|
* definition files using the genChRanges.py Python script
|
||||||
|
*
|
||||||
|
* Generation date: %s
|
||||||
|
* Sources: %s
|
||||||
|
* William Brack <wbrack@mmm.com.hk>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "chvalid.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The initial tables ({func_name}_tab) are used to validate whether a
|
||||||
|
* single-byte character is within the specified group. Each table
|
||||||
|
* contains 256 bytes, with each byte representing one of the 256
|
||||||
|
* possible characters. If the table byte is set, the character is
|
||||||
|
* allowed.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
""" % (date, sources));
|
||||||
|
|
||||||
|
#
|
||||||
|
# Now output the generated data.
|
||||||
|
# We try to produce the best execution times. Tests have shown that validation
|
||||||
|
# with direct table lookup is, when there are a "small" number of valid items,
|
||||||
|
# still not as fast as a sequence of inline compares. So, if the single-byte
|
||||||
|
# portion of a range has a "small" number of ranges, we output a macro for inline
|
||||||
|
# compares, otherwise we output a 256-byte table and a macro to use it.
|
||||||
|
#
|
||||||
|
|
||||||
|
fkeys = Functs.keys() # Dictionary of all defined functions
|
||||||
|
fkeys.sort() # Put some order to our output
|
||||||
|
|
||||||
|
for f in fkeys:
|
||||||
|
|
||||||
|
# First we convert the specified single-byte values into a group of ranges.
|
||||||
|
# If the total number of such ranges is less than minTableSize, we generate
|
||||||
|
# an inline macro for direct comparisons; if greater, we generate a lookup
|
||||||
|
# table.
|
||||||
|
if max(Functs[f][0]) > 0: # only check if at least one entry
|
||||||
|
rangeTable = makeRange(Functs[f][0])
|
||||||
|
numRanges = len(rangeTable)
|
||||||
|
if numRanges >= minTableSize: # table is worthwhile
|
||||||
|
header.write("extern unsigned char %s_tab[256];\n" % f)
|
||||||
|
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
|
||||||
|
|
||||||
|
# write the constant data to the code file
|
||||||
|
output.write("unsigned char %s_tab[256] = {\n" % f)
|
||||||
|
pline = " "
|
||||||
|
for n in range(255):
|
||||||
|
pline += " 0x%02x," % Functs[f][0][n]
|
||||||
|
if len(pline) > 72:
|
||||||
|
output.write(pline + "\n")
|
||||||
|
pline = " "
|
||||||
|
output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
|
||||||
|
|
||||||
|
else: # inline check is used
|
||||||
|
# first another little optimisation - if space is present,
|
||||||
|
# put it at the front of the list so it is checked first
|
||||||
|
try:
|
||||||
|
ix = rangeTable.remove((0x20, 0x20))
|
||||||
|
rangeTable.insert(0, (0x20, 0x20))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
pline = "#define %s_ch(c)\t( " % f
|
||||||
|
firstFlag = 1
|
||||||
|
for rg in rangeTable:
|
||||||
|
if not firstFlag:
|
||||||
|
pline += " || \\\n\t\t\t"
|
||||||
|
else:
|
||||||
|
firstFlag = 0
|
||||||
|
if rg[0] == rg[1]: # single value - check equal
|
||||||
|
pline += "((c) == " + hex(rg[0]) + ")"
|
||||||
|
else: # value range
|
||||||
|
pline += "((" + hex(rg[0]) + "<= (c)) &&"
|
||||||
|
pline += " ((c) <= " + hex(rg[1]) + "))"
|
||||||
|
pline += ")\n"
|
||||||
|
header.write(pline)
|
||||||
|
|
||||||
|
header.write("#define %s(c)\t(((c) < 0x100) ? \\\n\t\t\t\t" % f)
|
||||||
|
if max(Functs[f][0]) > 0:
|
||||||
|
header.write("%s_ch((c)) :" % f)
|
||||||
|
else:
|
||||||
|
header.write("0 :")
|
||||||
|
|
||||||
|
# if no ranges defined, value invalid if >= 0x100
|
||||||
|
if len(Functs[f][1]) == 0:
|
||||||
|
header.write(" 0)\n\n")
|
||||||
|
else:
|
||||||
|
header.write(" \\\n\t\t\t\txmlCharInRange((c), &%sGroup))\n\n" % f)
|
||||||
|
|
||||||
|
if len(Functs[f][1]) > 0:
|
||||||
|
header.write("extern xmlChRangeGroup %sGroup;\n" % f)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Next we do the unicode ranges
|
||||||
|
#
|
||||||
|
|
||||||
|
for f in fkeys:
|
||||||
|
if len(Functs[f][1]) > 0: # only generate if unicode ranges present
|
||||||
|
rangeTable = Functs[f][1]
|
||||||
|
rangeTable.sort() # ascending tuple sequence
|
||||||
|
numShort = 0
|
||||||
|
numLong = 0
|
||||||
|
for rg in rangeTable:
|
||||||
|
if rg[1] < 0x10000: # if short value
|
||||||
|
if numShort == 0: # first occurence
|
||||||
|
pline = "static xmlChSRange %s_srng[] = { " % f
|
||||||
|
else:
|
||||||
|
pline += ", "
|
||||||
|
numShort += 1
|
||||||
|
if len(pline) > 60:
|
||||||
|
output.write(pline + "\n")
|
||||||
|
pline = " "
|
||||||
|
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
|
||||||
|
else: # if long value
|
||||||
|
if numLong == 0: # first occurence
|
||||||
|
if numShort > 0: # if there were shorts, finish them off
|
||||||
|
output.write(pline + "};\n")
|
||||||
|
pline = "static xmlChLRange %s_lrng[] = { " % f
|
||||||
|
else:
|
||||||
|
pline += ", "
|
||||||
|
numLong += 1
|
||||||
|
if len(pline) > 60:
|
||||||
|
output.write(pline + "\n")
|
||||||
|
pline = " "
|
||||||
|
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
|
||||||
|
output.write(pline + "};\n") # finish off last group
|
||||||
|
|
||||||
|
pline = "xmlChRangeGroup %sGroup = {%d, %d, " % (f, numShort, numLong)
|
||||||
|
if numShort > 0:
|
||||||
|
pline += "%s_srng" % f
|
||||||
|
if numLong > 0:
|
||||||
|
pline += ", %s_lrng" % f
|
||||||
|
|
||||||
|
output.write(pline + "};\n\n")
|
||||||
|
#
|
||||||
|
# Run complete - write trailers and close the output files
|
||||||
|
#
|
||||||
|
|
||||||
|
header.write("""
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif /* __XML_CHVALID_H__ */
|
||||||
|
""");
|
||||||
|
|
||||||
|
header.close()
|
||||||
|
|
||||||
|
output.write(
|
||||||
|
"""
|
||||||
|
int
|
||||||
|
xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
|
||||||
|
int low, high, mid;
|
||||||
|
xmlChSRangePtr sptr;
|
||||||
|
xmlChLRangePtr lptr;
|
||||||
|
if (val < 0x10000) { /* is val in 'short' or 'long' array? */
|
||||||
|
if (rptr->nbShortRange == 0)
|
||||||
|
return 0;
|
||||||
|
low = 0;
|
||||||
|
high = rptr->nbShortRange;
|
||||||
|
sptr = rptr->shortRange;
|
||||||
|
while (low <= high) {
|
||||||
|
mid = (low + high) / 2;
|
||||||
|
if ((unsigned short) val < sptr[mid].low)
|
||||||
|
high = mid - 1;
|
||||||
|
else if ((unsigned short) val > sptr[mid].high)
|
||||||
|
low = mid + 1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (rptr->nbLongRange == 0)
|
||||||
|
return 0;
|
||||||
|
low = 0;
|
||||||
|
high = rptr->nbLongRange;
|
||||||
|
lptr = rptr->longRange;
|
||||||
|
while (low <= high) {
|
||||||
|
mid = (low + high) / 2;
|
||||||
|
if (val < lptr[mid].low)
|
||||||
|
high = mid - 1;
|
||||||
|
else if (val > lptr[mid].high)
|
||||||
|
low = mid + 1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
""");
|
||||||
|
|
||||||
|
output.close()
|
||||||
@@ -43,7 +43,8 @@ xmlinc_HEADERS = \
|
|||||||
dict.h \
|
dict.h \
|
||||||
SAX2.h \
|
SAX2.h \
|
||||||
xmlexports.h \
|
xmlexports.h \
|
||||||
xmldwalk.h
|
xmldwalk.h \
|
||||||
|
chvalid.h
|
||||||
|
|
||||||
install-exec-hook:
|
install-exec-hook:
|
||||||
$(mkinstalldirs) $(DESTDIR)$(xmlincdir)
|
$(mkinstalldirs) $(DESTDIR)$(xmlincdir)
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include <libxml/xmlversion.h>
|
#include <libxml/xmlversion.h>
|
||||||
#include <libxml/parser.h>
|
#include <libxml/parser.h>
|
||||||
#include <libxml/HTMLparser.h>
|
#include <libxml/HTMLparser.h>
|
||||||
|
#include <libxml/chvalid.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@@ -48,8 +49,7 @@ extern "C" {
|
|||||||
* [2] Char ::= #x9 | #xA | #xD | [#x20...]
|
* [2] Char ::= #x9 | #xA | #xD | [#x20...]
|
||||||
* any byte character in the accepted range
|
* any byte character in the accepted range
|
||||||
*/
|
*/
|
||||||
#define IS_BYTE_CHAR(c) \
|
#define IS_BYTE_CHAR(c) xmlIsChar_ch(c)
|
||||||
(((c) >= 0x20) || ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D))
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IS_CHAR:
|
* IS_CHAR:
|
||||||
@@ -61,11 +61,7 @@ extern "C" {
|
|||||||
* | [#x10000-#x10FFFF]
|
* | [#x10000-#x10FFFF]
|
||||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||||
*/
|
*/
|
||||||
#define IS_CHAR(c) \
|
#define IS_CHAR(c) xmlIsChar(c)
|
||||||
((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
|
|
||||||
((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
|
||||||
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
|
|
||||||
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IS_BLANK:
|
* IS_BLANK:
|
||||||
@@ -75,8 +71,7 @@ extern "C" {
|
|||||||
*
|
*
|
||||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
*/
|
*/
|
||||||
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
|
#define IS_BLANK(c) xmlIsBlank(c)
|
||||||
((c) == 0x0D))
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IS_BASECHAR:
|
* IS_BASECHAR:
|
||||||
@@ -193,15 +188,7 @@ XMLPUBVAR const xmlChar xmlStringComment[];
|
|||||||
/*
|
/*
|
||||||
* Function to finish the work of the macros where needed.
|
* Function to finish the work of the macros where needed.
|
||||||
*/
|
*/
|
||||||
XMLPUBFUN int XMLCALL xmlIsBaseChar (int c);
|
XMLPUBFUN int XMLCALL xmlIsLetter (int c);
|
||||||
XMLPUBFUN int XMLCALL xmlIsBlank (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsPubidChar (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsLetter (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsDigit (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsIdeographic(int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsExtender (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsCombining (int c);
|
|
||||||
XMLPUBFUN int XMLCALL xmlIsChar (int c);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parser context.
|
* Parser context.
|
||||||
|
|||||||
@@ -53,6 +53,7 @@
|
|||||||
#include <libxml/catalog.h>
|
#include <libxml/catalog.h>
|
||||||
#endif
|
#endif
|
||||||
#include <libxml/globals.h>
|
#include <libxml/globals.h>
|
||||||
|
#include <libxml/chvalid.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Various global defaults for parsing
|
* Various global defaults for parsing
|
||||||
@@ -196,472 +197,6 @@ xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
|
||||||
* *
|
|
||||||
* Some functions to avoid too large macros *
|
|
||||||
* *
|
|
||||||
************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsChar:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
|
|
||||||
* | [#x10000-#x10FFFF]
|
|
||||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
|
||||||
* Also available as a macro IS_CHAR()
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsChar(int c) {
|
|
||||||
return(
|
|
||||||
((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
|
|
||||||
(((c) >= 0x20) && ((c) <= 0xD7FF)) ||
|
|
||||||
(((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
|
|
||||||
(((c) >= 0x10000) && ((c) <= 0x10FFFF)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsBlank:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
|
||||||
* Also available as a macro IS_BLANK()
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsBlank(int c) {
|
|
||||||
return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int xmlBaseArray[] = {
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsBaseChar:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [85] BaseChar ::= ... long list see REC ...
|
|
||||||
*
|
|
||||||
* VI is your friend !
|
|
||||||
* :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
|
|
||||||
* and
|
|
||||||
* :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsBaseChar(int c) {
|
|
||||||
if (c < 0x0100) return(xmlBaseArray[c]);
|
|
||||||
return((((c) >= 0x0100) && ((c) <= 0x0131)) ||
|
|
||||||
(((c) >= 0x0134) && ((c) <= 0x013E)) ||
|
|
||||||
(((c) >= 0x0141) && ((c) <= 0x0148)) ||
|
|
||||||
(((c) >= 0x014A) && ((c) <= 0x017E)) ||
|
|
||||||
(((c) >= 0x0180) && ((c) <= 0x01C3)) ||
|
|
||||||
(((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
|
|
||||||
(((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
|
|
||||||
(((c) >= 0x01FA) && ((c) <= 0x0217)) ||
|
|
||||||
(((c) >= 0x0250) && ((c) <= 0x02A8)) ||
|
|
||||||
(((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
|
|
||||||
((c) == 0x0386) ||
|
|
||||||
(((c) >= 0x0388) && ((c) <= 0x038A)) ||
|
|
||||||
((c) == 0x038C) ||
|
|
||||||
(((c) >= 0x038E) && ((c) <= 0x03A1)) ||
|
|
||||||
(((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
|
|
||||||
(((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
|
|
||||||
((c) == 0x03DA) ||
|
|
||||||
((c) == 0x03DC) ||
|
|
||||||
((c) == 0x03DE) ||
|
|
||||||
((c) == 0x03E0) ||
|
|
||||||
(((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
|
|
||||||
(((c) >= 0x0401) && ((c) <= 0x040C)) ||
|
|
||||||
(((c) >= 0x040E) && ((c) <= 0x044F)) ||
|
|
||||||
(((c) >= 0x0451) && ((c) <= 0x045C)) ||
|
|
||||||
(((c) >= 0x045E) && ((c) <= 0x0481)) ||
|
|
||||||
(((c) >= 0x0490) && ((c) <= 0x04C4)) ||
|
|
||||||
(((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
|
|
||||||
(((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
|
|
||||||
(((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
|
|
||||||
(((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
|
|
||||||
(((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
|
|
||||||
(((c) >= 0x0531) && ((c) <= 0x0556)) ||
|
|
||||||
((c) == 0x0559) ||
|
|
||||||
(((c) >= 0x0561) && ((c) <= 0x0586)) ||
|
|
||||||
(((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
|
|
||||||
(((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
|
|
||||||
(((c) >= 0x0621) && ((c) <= 0x063A)) ||
|
|
||||||
(((c) >= 0x0641) && ((c) <= 0x064A)) ||
|
|
||||||
(((c) >= 0x0671) && ((c) <= 0x06B7)) ||
|
|
||||||
(((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
|
|
||||||
(((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
|
|
||||||
(((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
|
|
||||||
((c) == 0x06D5) ||
|
|
||||||
(((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
|
|
||||||
(((c) >= 0x905) && ( /* accelerator */
|
|
||||||
(((c) >= 0x0905) && ((c) <= 0x0939)) ||
|
|
||||||
((c) == 0x093D) ||
|
|
||||||
(((c) >= 0x0958) && ((c) <= 0x0961)) ||
|
|
||||||
(((c) >= 0x0985) && ((c) <= 0x098C)) ||
|
|
||||||
(((c) >= 0x098F) && ((c) <= 0x0990)) ||
|
|
||||||
(((c) >= 0x0993) && ((c) <= 0x09A8)) ||
|
|
||||||
(((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
|
|
||||||
((c) == 0x09B2) ||
|
|
||||||
(((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
|
|
||||||
(((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
|
|
||||||
(((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
|
|
||||||
(((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
|
|
||||||
(((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
|
|
||||||
(((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
|
|
||||||
(((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
|
|
||||||
(((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
|
|
||||||
(((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
|
|
||||||
(((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
|
|
||||||
(((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
|
|
||||||
(((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
|
|
||||||
((c) == 0x0A5E) ||
|
|
||||||
(((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
|
|
||||||
(((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
|
|
||||||
((c) == 0x0A8D) ||
|
|
||||||
(((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
|
|
||||||
(((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
|
|
||||||
(((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
|
|
||||||
(((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
|
|
||||||
(((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
|
|
||||||
((c) == 0x0ABD) ||
|
|
||||||
((c) == 0x0AE0) ||
|
|
||||||
(((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
|
|
||||||
(((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
|
|
||||||
(((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
|
|
||||||
(((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
|
|
||||||
(((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
|
|
||||||
(((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
|
|
||||||
((c) == 0x0B3D) ||
|
|
||||||
(((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
|
|
||||||
(((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
|
|
||||||
(((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
|
|
||||||
(((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
|
|
||||||
(((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
|
|
||||||
(((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
|
|
||||||
((c) == 0x0B9C) ||
|
|
||||||
(((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
|
|
||||||
(((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
|
|
||||||
(((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
|
|
||||||
(((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
|
|
||||||
(((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
|
|
||||||
(((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
|
|
||||||
(((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
|
|
||||||
(((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
|
|
||||||
(((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
|
|
||||||
(((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
|
|
||||||
(((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
|
|
||||||
(((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
|
|
||||||
(((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
|
|
||||||
(((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
|
|
||||||
(((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
|
|
||||||
(((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
|
|
||||||
((c) == 0x0CDE) ||
|
|
||||||
(((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
|
|
||||||
(((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
|
|
||||||
(((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
|
|
||||||
(((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
|
|
||||||
(((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
|
|
||||||
(((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
|
|
||||||
(((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
|
|
||||||
((c) == 0x0E30) ||
|
|
||||||
(((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
|
|
||||||
(((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
|
|
||||||
(((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
|
|
||||||
((c) == 0x0E84) ||
|
|
||||||
(((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
|
|
||||||
((c) == 0x0E8A) ||
|
|
||||||
((c) == 0x0E8D) ||
|
|
||||||
(((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
|
|
||||||
(((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
|
|
||||||
(((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
|
|
||||||
((c) == 0x0EA5) ||
|
|
||||||
((c) == 0x0EA7) ||
|
|
||||||
(((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
|
|
||||||
(((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
|
|
||||||
((c) == 0x0EB0) ||
|
|
||||||
(((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
|
|
||||||
((c) == 0x0EBD) ||
|
|
||||||
(((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
|
|
||||||
(((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
|
|
||||||
(((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
|
|
||||||
(((c) >= 0x10A0) && ( /* accelerator */
|
|
||||||
(((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
|
|
||||||
(((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
|
|
||||||
((c) == 0x1100) ||
|
|
||||||
(((c) >= 0x1102) && ((c) <= 0x1103)) ||
|
|
||||||
(((c) >= 0x1105) && ((c) <= 0x1107)) ||
|
|
||||||
((c) == 0x1109) ||
|
|
||||||
(((c) >= 0x110B) && ((c) <= 0x110C)) ||
|
|
||||||
(((c) >= 0x110E) && ((c) <= 0x1112)) ||
|
|
||||||
((c) == 0x113C) ||
|
|
||||||
((c) == 0x113E) ||
|
|
||||||
((c) == 0x1140) ||
|
|
||||||
((c) == 0x114C) ||
|
|
||||||
((c) == 0x114E) ||
|
|
||||||
((c) == 0x1150) ||
|
|
||||||
(((c) >= 0x1154) && ((c) <= 0x1155)) ||
|
|
||||||
((c) == 0x1159) ||
|
|
||||||
(((c) >= 0x115F) && ((c) <= 0x1161)) ||
|
|
||||||
((c) == 0x1163) ||
|
|
||||||
((c) == 0x1165) ||
|
|
||||||
((c) == 0x1167) ||
|
|
||||||
((c) == 0x1169) ||
|
|
||||||
(((c) >= 0x116D) && ((c) <= 0x116E)) ||
|
|
||||||
(((c) >= 0x1172) && ((c) <= 0x1173)) ||
|
|
||||||
((c) == 0x1175) ||
|
|
||||||
((c) == 0x119E) ||
|
|
||||||
((c) == 0x11A8) ||
|
|
||||||
((c) == 0x11AB) ||
|
|
||||||
(((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
|
|
||||||
(((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
|
|
||||||
((c) == 0x11BA) ||
|
|
||||||
(((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
|
|
||||||
((c) == 0x11EB) ||
|
|
||||||
((c) == 0x11F0) ||
|
|
||||||
((c) == 0x11F9) ||
|
|
||||||
(((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
|
|
||||||
(((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
|
|
||||||
(((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
|
|
||||||
(((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
|
|
||||||
(((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
|
|
||||||
(((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
|
|
||||||
(((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
|
|
||||||
((c) == 0x1F59) ||
|
|
||||||
((c) == 0x1F5B) ||
|
|
||||||
((c) == 0x1F5D) ||
|
|
||||||
(((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
|
|
||||||
(((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
|
|
||||||
(((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
|
|
||||||
((c) == 0x1FBE) ||
|
|
||||||
(((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
|
|
||||||
(((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
|
|
||||||
(((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
|
|
||||||
(((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
|
|
||||||
(((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
|
|
||||||
(((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
|
|
||||||
(((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
|
|
||||||
((c) == 0x2126) ||
|
|
||||||
(((c) >= 0x212A) && ((c) <= 0x212B)) ||
|
|
||||||
((c) == 0x212E) ||
|
|
||||||
(((c) >= 0x2180) && ((c) <= 0x2182)) ||
|
|
||||||
(((c) >= 0x3041) && ((c) <= 0x3094)) ||
|
|
||||||
(((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
|
|
||||||
(((c) >= 0x3105) && ((c) <= 0x312C)) ||
|
|
||||||
(((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */
|
|
||||||
))));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsDigit:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [88] Digit ::= ... long list see REC ...
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsDigit(int c) {
|
|
||||||
return(
|
|
||||||
(((c) >= 0x0030) && ((c) <= 0x0039)) ||
|
|
||||||
(((c) >= 0x660) && ( /* accelerator */
|
|
||||||
(((c) >= 0x0660) && ((c) <= 0x0669)) ||
|
|
||||||
(((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
|
|
||||||
(((c) >= 0x0966) && ((c) <= 0x096F)) ||
|
|
||||||
(((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
|
|
||||||
(((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
|
|
||||||
(((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
|
|
||||||
(((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
|
|
||||||
(((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
|
|
||||||
(((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
|
|
||||||
(((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
|
|
||||||
(((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
|
|
||||||
(((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
|
|
||||||
(((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
|
|
||||||
(((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsCombining:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [87] CombiningChar ::= ... long list see REC ...
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsCombining(int c) {
|
|
||||||
return(
|
|
||||||
(((c) >= 0x300) && ( /* accelerator */
|
|
||||||
(((c) >= 0x0300) && ((c) <= 0x0345)) ||
|
|
||||||
(((c) >= 0x0360) && ((c) <= 0x0361)) ||
|
|
||||||
(((c) >= 0x0483) && ((c) <= 0x0486)) ||
|
|
||||||
(((c) >= 0x0591) && ((c) <= 0x05A1)) ||
|
|
||||||
(((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
|
|
||||||
(((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
|
|
||||||
((c) == 0x05BF) ||
|
|
||||||
(((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
|
|
||||||
((c) == 0x05C4) ||
|
|
||||||
(((c) >= 0x064B) && ((c) <= 0x0652)) ||
|
|
||||||
((c) == 0x0670) ||
|
|
||||||
(((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
|
|
||||||
(((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
|
|
||||||
(((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
|
|
||||||
(((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
|
|
||||||
(((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
|
|
||||||
(((c) >= 0x0901) && ( /* accelerator */
|
|
||||||
(((c) >= 0x0901) && ((c) <= 0x0903)) ||
|
|
||||||
((c) == 0x093C) ||
|
|
||||||
(((c) >= 0x093E) && ((c) <= 0x094C)) ||
|
|
||||||
((c) == 0x094D) ||
|
|
||||||
(((c) >= 0x0951) && ((c) <= 0x0954)) ||
|
|
||||||
(((c) >= 0x0962) && ((c) <= 0x0963)) ||
|
|
||||||
(((c) >= 0x0981) && ((c) <= 0x0983)) ||
|
|
||||||
((c) == 0x09BC) ||
|
|
||||||
((c) == 0x09BE) ||
|
|
||||||
((c) == 0x09BF) ||
|
|
||||||
(((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
|
|
||||||
(((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
|
|
||||||
(((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
|
|
||||||
((c) == 0x09D7) ||
|
|
||||||
(((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
|
|
||||||
(((c) >= 0x0A02) && ( /* accelerator */
|
|
||||||
((c) == 0x0A02) ||
|
|
||||||
((c) == 0x0A3C) ||
|
|
||||||
((c) == 0x0A3E) ||
|
|
||||||
((c) == 0x0A3F) ||
|
|
||||||
(((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
|
|
||||||
(((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
|
|
||||||
(((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
|
|
||||||
(((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
|
|
||||||
(((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
|
|
||||||
((c) == 0x0ABC) ||
|
|
||||||
(((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
|
|
||||||
(((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
|
|
||||||
(((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
|
|
||||||
(((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
|
|
||||||
((c) == 0x0B3C) ||
|
|
||||||
(((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
|
|
||||||
(((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
|
|
||||||
(((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
|
|
||||||
(((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
|
|
||||||
(((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
|
|
||||||
(((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
|
|
||||||
(((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
|
|
||||||
(((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
|
|
||||||
((c) == 0x0BD7) ||
|
|
||||||
(((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
|
|
||||||
(((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
|
|
||||||
(((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
|
|
||||||
(((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
|
|
||||||
(((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
|
|
||||||
(((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
|
|
||||||
(((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
|
|
||||||
(((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
|
|
||||||
(((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
|
|
||||||
(((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
|
|
||||||
(((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
|
|
||||||
(((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
|
|
||||||
(((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
|
|
||||||
(((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
|
|
||||||
((c) == 0x0D57) ||
|
|
||||||
(((c) >= 0x0E31) && ( /* accelerator */
|
|
||||||
((c) == 0x0E31) ||
|
|
||||||
(((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
|
|
||||||
(((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
|
|
||||||
((c) == 0x0EB1) ||
|
|
||||||
(((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
|
|
||||||
(((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
|
|
||||||
(((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
|
|
||||||
(((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
|
|
||||||
((c) == 0x0F35) ||
|
|
||||||
((c) == 0x0F37) ||
|
|
||||||
((c) == 0x0F39) ||
|
|
||||||
((c) == 0x0F3E) ||
|
|
||||||
((c) == 0x0F3F) ||
|
|
||||||
(((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
|
|
||||||
(((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
|
|
||||||
(((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
|
|
||||||
((c) == 0x0F97) ||
|
|
||||||
(((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
|
|
||||||
(((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
|
|
||||||
((c) == 0x0FB9) ||
|
|
||||||
(((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
|
|
||||||
((c) == 0x20E1) ||
|
|
||||||
(((c) >= 0x302A) && ((c) <= 0x302F)) ||
|
|
||||||
((c) == 0x3099) ||
|
|
||||||
((c) == 0x309A))))))))));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsExtender:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
|
|
||||||
* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
|
|
||||||
* [#x309D-#x309E] | [#x30FC-#x30FE]
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsExtender(int c) {
|
|
||||||
switch (c) {
|
|
||||||
case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
|
|
||||||
case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
|
|
||||||
case 0x3031: case 0x3032: case 0x3033: case 0x3034:
|
|
||||||
case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
|
|
||||||
case 0x30FD: case 0x30FE:
|
|
||||||
return 1;
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsIdeographic:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsIdeographic(int c) {
|
|
||||||
return(((c) < 0x0100) ? 0 :
|
|
||||||
(((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
|
|
||||||
(((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
|
|
||||||
(((c) >= 0x3021) && ((c) <= 0x3029)) ||
|
|
||||||
((c) == 0x3007));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlIsLetter:
|
* xmlIsLetter:
|
||||||
* @c: an unicode character (int)
|
* @c: an unicode character (int)
|
||||||
@@ -676,29 +211,6 @@ xmlIsLetter(int c) {
|
|||||||
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
|
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* xmlIsPubidChar:
|
|
||||||
* @c: an unicode character (int)
|
|
||||||
*
|
|
||||||
* Check whether the character is allowed by the production
|
|
||||||
* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
|
|
||||||
*
|
|
||||||
* Returns 0 if not, non-zero otherwise
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
xmlIsPubidChar(int c) {
|
|
||||||
return(
|
|
||||||
((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
|
|
||||||
(((c) >= 'a') && ((c) <= 'z')) ||
|
|
||||||
(((c) >= 'A') && ((c) <= 'Z')) ||
|
|
||||||
(((c) >= '0') && ((c) <= '9')) ||
|
|
||||||
((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
|
|
||||||
((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
|
|
||||||
((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
|
|
||||||
((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
|
|
||||||
((c) == '$') || ((c) == '_') || ((c) == '%'));
|
|
||||||
}
|
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* *
|
* *
|
||||||
* Input handling functions for progressive parsing *
|
* Input handling functions for progressive parsing *
|
||||||
|
|||||||
Reference in New Issue
Block a user