1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

added two new macros IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)

* include/libxml/parserInternals.h: added two new macros
  IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)
  parsing and xpath for testing data not necessarily
  unicode.
* HTMLparser.c, xpath.c: changed use of IS_LETTER_CH and
  IS_DIGIT_CH macros to ascii versions (bug 153936).
This commit is contained in:
William M. Brack
2004-10-02 22:07:48 +00:00
parent d0407520d8
commit d1757abcb8
4 changed files with 47 additions and 9 deletions

View File

@@ -1,3 +1,12 @@
Sat Oct 2 15:03:14 PDT 2004 William Brack <wbrack@mmm.com.hk>
* include/libxml/parserInternals.h: added two new macros
IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)
parsing and xpath for testing data not necessarily
unicode.
* HTMLparser.c, xpath.c: changed use of IS_LETTER_CH and
IS_DIGIT_CH macros to ascii versions (bug 153936).
Fri Oct 1 20:37:25 PDT 2004 William Brack <wbrack@mmm.com.hk> Fri Oct 1 20:37:25 PDT 2004 William Brack <wbrack@mmm.com.hk>
* error.c: added some coding to attempt to display which file * error.c: added some coding to attempt to display which file

View File

@@ -2168,11 +2168,11 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
int i = 0; int i = 0;
xmlChar loc[HTML_PARSER_BUFFER_SIZE]; xmlChar loc[HTML_PARSER_BUFFER_SIZE];
if (!IS_LETTER_CH(CUR) && (CUR != '_') && if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
(CUR != ':')) return(NULL); (CUR != ':')) return(NULL);
while ((i < HTML_PARSER_BUFFER_SIZE) && while ((i < HTML_PARSER_BUFFER_SIZE) &&
((IS_LETTER_CH(CUR)) || (IS_DIGIT_CH(CUR)) || ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
(CUR == ':') || (CUR == '-') || (CUR == '_'))) { (CUR == ':') || (CUR == '-') || (CUR == '_'))) {
if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
else loc[i] = CUR; else loc[i] = CUR;

View File

@@ -197,6 +197,26 @@ XMLPUBVAR unsigned int xmlParserMaxDepth;
* *
*/ */
#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
/**
* IS_ASCII_LETTER(c)
* @c: an xmlChar value
*
* Macro to check [a-zA-Z]
*
*/
#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
((0x61 <= (c)) && ((c) <= 0x7a)))
/**
* IS_ASCII_DIGIT(c)
* @c: an xmlChar value
*
* Macro to check [0-9]
*
*/
#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))
/** /**
* IS_PUBIDCHAR: * IS_PUBIDCHAR:
* @c: an UNICODE value (int) * @c: an UNICODE value (int)

23
xpath.c
View File

@@ -57,6 +57,13 @@
"Unimplemented block at %s:%d\n", \ "Unimplemented block at %s:%d\n", \
__FILE__, __LINE__); __FILE__, __LINE__);
/*
* TODO:
* There are a few spots where some tests are done which depend upon ascii
* data. These should be enhanced for full UTF8 support (see particularly
* any use of the macros IS_ASCII_CHARACTER and IS_ASCII_DIGIT)
*/
#if defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_XPATH_ENABLED) #if defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_XPATH_ENABLED)
/************************************************************************ /************************************************************************
* * * *
@@ -7947,7 +7954,7 @@ xmlXPathCompPrimaryExpr(xmlXPathParserContextPtr ctxt) {
} }
NEXT; NEXT;
SKIP_BLANKS; SKIP_BLANKS;
} else if (IS_DIGIT_CH(CUR) || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) { } else if (IS_ASCII_DIGIT(CUR) || (CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) {
xmlXPathCompNumber(ctxt); xmlXPathCompNumber(ctxt);
} else if ((CUR == '\'') || (CUR == '"')) { } else if ((CUR == '\'') || (CUR == '"')) {
xmlXPathCompLiteral(ctxt); xmlXPathCompLiteral(ctxt);
@@ -8009,12 +8016,12 @@ xmlXPathScanName(xmlXPathParserContextPtr ctxt) {
int len = 0; int len = 0;
SKIP_BLANKS; SKIP_BLANKS;
if (!IS_LETTER_CH(CUR) && (CUR != '_') && if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
(CUR != ':')) { (CUR != ':')) {
return(NULL); return(NULL);
} }
while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) || while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) ||
(NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') || (NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING_CH(NXT(len))) || (IS_COMBINING_CH(NXT(len))) ||
@@ -8024,7 +8031,7 @@ xmlXPathScanName(xmlXPathParserContextPtr ctxt) {
if (len >= XML_MAX_NAMELEN) { if (len >= XML_MAX_NAMELEN) {
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"xmlScanName: reached XML_MAX_NAMELEN limit\n"); "xmlScanName: reached XML_MAX_NAMELEN limit\n");
while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) || while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) ||
(NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') || (NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING_CH(NXT(len))) || (IS_COMBINING_CH(NXT(len))) ||
@@ -8060,8 +8067,10 @@ xmlXPathCompPathExpr(xmlXPathParserContextPtr ctxt) {
xmlChar *name = NULL; /* we may have to preparse a name to find out */ xmlChar *name = NULL; /* we may have to preparse a name to find out */
SKIP_BLANKS; SKIP_BLANKS;
if ((CUR == '$') || (CUR == '(') || (IS_DIGIT_CH(CUR)) || if ((CUR == '$') || (CUR == '(') ||
(CUR == '\'') || (CUR == '"') || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) { (IS_ASCII_DIGIT(CUR)) ||
(CUR == '\'') || (CUR == '"') ||
(CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) {
lc = 0; lc = 0;
} else if (CUR == '*') { } else if (CUR == '*') {
/* relative or absolute location path */ /* relative or absolute location path */
@@ -8957,7 +8966,7 @@ xmlXPathCompLocationPath(xmlXPathParserContextPtr ctxt) {
NEXT; NEXT;
SKIP_BLANKS; SKIP_BLANKS;
if ((CUR != 0 ) && if ((CUR != 0 ) &&
((IS_LETTER_CH(CUR)) || (CUR == '_') || (CUR == '.') || ((IS_ASCII_LETTER(CUR)) || (CUR == '_') || (CUR == '.') ||
(CUR == '@') || (CUR == '*'))) (CUR == '@') || (CUR == '*')))
xmlXPathCompRelativeLocationPath(ctxt); xmlXPathCompRelativeLocationPath(ctxt);
} }