diff --git a/ChangeLog b/ChangeLog index 53be3bea..6889b65b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Sat Oct 2 15:03:14 PDT 2004 William Brack + + * include/libxml/parserInternals.h: added two new macros + IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html) + parsing and xpath for testing data not necessarily + unicode. + * HTMLparser.c, xpath.c: changed use of IS_LETTER_CH and + IS_DIGIT_CH macros to ascii versions (bug 153936). + Fri Oct 1 20:37:25 PDT 2004 William Brack * error.c: added some coding to attempt to display which file diff --git a/HTMLparser.c b/HTMLparser.c index 10f85166..72a08704 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2168,11 +2168,11 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) { int i = 0; xmlChar loc[HTML_PARSER_BUFFER_SIZE]; - if (!IS_LETTER_CH(CUR) && (CUR != '_') && + if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && (CUR != ':')) return(NULL); while ((i < HTML_PARSER_BUFFER_SIZE) && - ((IS_LETTER_CH(CUR)) || (IS_DIGIT_CH(CUR)) || + ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || (CUR == ':') || (CUR == '-') || (CUR == '_'))) { if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; else loc[i] = CUR; diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 57c4b178..14d4e4da 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -197,6 +197,26 @@ XMLPUBVAR unsigned int xmlParserMaxDepth; * */ #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) + +/** + * IS_ASCII_LETTER(c) + * @c: an xmlChar value + * + * Macro to check [a-zA-Z] + * + */ +#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ + ((0x61 <= (c)) && ((c) <= 0x7a))) + +/** + * IS_ASCII_DIGIT(c) + * @c: an xmlChar value + * + * Macro to check [0-9] + * + */ +#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39)) + /** * IS_PUBIDCHAR: * @c: an UNICODE value (int) diff --git a/xpath.c b/xpath.c index 98667362..c55958d2 100644 --- a/xpath.c +++ b/xpath.c @@ -57,6 +57,13 @@ "Unimplemented block at %s:%d\n", \ __FILE__, __LINE__); +/* + * TODO: + * There are a few spots where some tests are done which depend upon ascii + * data. These should be enhanced for full UTF8 support (see particularly + * any use of the macros IS_ASCII_CHARACTER and IS_ASCII_DIGIT) + */ + #if defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_XPATH_ENABLED) /************************************************************************ * * @@ -7947,7 +7954,7 @@ xmlXPathCompPrimaryExpr(xmlXPathParserContextPtr ctxt) { } NEXT; SKIP_BLANKS; - } else if (IS_DIGIT_CH(CUR) || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) { + } else if (IS_ASCII_DIGIT(CUR) || (CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) { xmlXPathCompNumber(ctxt); } else if ((CUR == '\'') || (CUR == '"')) { xmlXPathCompLiteral(ctxt); @@ -8009,12 +8016,12 @@ xmlXPathScanName(xmlXPathParserContextPtr ctxt) { int len = 0; SKIP_BLANKS; - if (!IS_LETTER_CH(CUR) && (CUR != '_') && + if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && (CUR != ':')) { return(NULL); } - while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) || + while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) || (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING_CH(NXT(len))) || @@ -8024,7 +8031,7 @@ xmlXPathScanName(xmlXPathParserContextPtr ctxt) { if (len >= XML_MAX_NAMELEN) { xmlGenericError(xmlGenericErrorContext, "xmlScanName: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) || + while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) || (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING_CH(NXT(len))) || @@ -8060,8 +8067,10 @@ xmlXPathCompPathExpr(xmlXPathParserContextPtr ctxt) { xmlChar *name = NULL; /* we may have to preparse a name to find out */ SKIP_BLANKS; - if ((CUR == '$') || (CUR == '(') || (IS_DIGIT_CH(CUR)) || - (CUR == '\'') || (CUR == '"') || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) { + if ((CUR == '$') || (CUR == '(') || + (IS_ASCII_DIGIT(CUR)) || + (CUR == '\'') || (CUR == '"') || + (CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) { lc = 0; } else if (CUR == '*') { /* relative or absolute location path */ @@ -8957,7 +8966,7 @@ xmlXPathCompLocationPath(xmlXPathParserContextPtr ctxt) { NEXT; SKIP_BLANKS; if ((CUR != 0 ) && - ((IS_LETTER_CH(CUR)) || (CUR == '_') || (CUR == '.') || + ((IS_ASCII_LETTER(CUR)) || (CUR == '_') || (CUR == '.') || (CUR == '@') || (CUR == '*'))) xmlXPathCompRelativeLocationPath(ctxt); }