diff --git a/ChangeLog b/ChangeLog index db91021d..7a50abb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Sun Jan 13 16:37:15 CET 2002 Daniel Veillard + + * valid.c: fixed validation of attributes content of type + NAME NAMES NMTOKEN and NMTOKENS to accept internationalized + values, very old bug. Fixes #67671 + Sun Jan 13 15:07:49 CET 2002 Daniel Veillard * parser.c include/libxml/parserInternals.h tree.c: integrated diff --git a/parserInternals.c b/parserInternals.c index 9a570c91..4af5d717 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1391,68 +1391,69 @@ encoding_error: */ int -xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { +xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) +{ if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { - /* - * We are supposed to handle UTF8, check it's valid - * From rfc2044: encoding of the Unicode values on UTF-8: - * - * UCS-4 range (hex.) UTF-8 octet sequence (binary) - * 0000 0000-0000 007F 0xxxxxxx - * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx - * - * Check for the 0x110000 limit too - */ - unsigned char c; - unsigned int val; + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + unsigned char c; + unsigned int val; - c = *cur; - if (c & 0x80) { - if ((cur[1] & 0xc0) != 0x80) - goto encoding_error; - if ((c & 0xe0) == 0xe0) { + c = *cur; + if (c & 0x80) { + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { - if ((cur[2] & 0xc0) != 0x80) - goto encoding_error; - if ((c & 0xf0) == 0xf0) { - if (((c & 0xf8) != 0xf0) || - ((cur[3] & 0xc0) != 0x80)) - goto encoding_error; - /* 4-byte code */ - *len = 4; - val = (cur[0] & 0x7) << 18; - val |= (cur[1] & 0x3f) << 12; - val |= (cur[2] & 0x3f) << 6; - val |= cur[3] & 0x3f; - } else { - /* 3-byte code */ - *len = 3; - val = (cur[0] & 0xf) << 12; - val |= (cur[1] & 0x3f) << 6; - val |= cur[2] & 0x3f; - } - } else { - /* 2-byte code */ - *len = 2; - val = (cur[0] & 0x1f) << 6; - val |= cur[1] & 0x3f; - } - if (!IS_CHAR(val)) { - if ((ctxt->sax != NULL) && - (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Char 0x%X out of allowed range\n", val); - ctxt->errNo = XML_ERR_INVALID_ENCODING; - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - } - return(val); - } else { - /* 1-byte code */ - *len = 1; - return((int) *cur); - } + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + if ((ctxt != NULL) && (ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", + val); + ctxt->errNo = XML_ERR_INVALID_ENCODING; + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + return (val); + } else { + /* 1-byte code */ + *len = 1; + return ((int) *cur); + } } /* * Assume it's a fixed length encoding (1) with @@ -1460,8 +1461,9 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { * XML constructs only use < 128 chars */ *len = 1; - return((int) *cur); + return ((int) *cur); encoding_error: + /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertised in the @@ -1469,17 +1471,20 @@ encoding_error: * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { - ctxt->sax->error(ctxt->userData, - "Input is not proper UTF-8, indicate encoding !\n"); - ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - ctxt->input->cur[0], ctxt->input->cur[1], - ctxt->input->cur[2], ctxt->input->cur[3]); + if (ctxt != NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + ctxt->errNo = XML_ERR_INVALID_ENCODING; } - ctxt->errNo = XML_ERR_INVALID_ENCODING; *len = 1; - return((int) *cur); + return ((int) *cur); } /** diff --git a/valid.c b/valid.c index 24ce16aa..55c7b9e2 100644 --- a/valid.c +++ b/valid.c @@ -2565,23 +2565,29 @@ xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) { static int xmlValidateNameValue(const xmlChar *value) { const xmlChar *cur; + int val, len; if (value == NULL) return(0); cur = value; - - if (!IS_LETTER(*cur) && (*cur != '_') && - (*cur != ':')) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { return(0); } - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } - if (*cur != 0) return(0); + if (val != 0) return(0); return(1); } @@ -2598,39 +2604,53 @@ xmlValidateNameValue(const xmlChar *value) { static int xmlValidateNamesValue(const xmlChar *value) { const xmlChar *cur; + int val, len; if (value == NULL) return(0); cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; - if (!IS_LETTER(*cur) && (*cur != '_') && - (*cur != ':')) { + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { return(0); } - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; - - while (IS_BLANK(*cur)) { - while (IS_BLANK(*cur)) cur++; - - if (!IS_LETTER(*cur) && (*cur != '_') && - (*cur != ':')) { - return(0); - } - - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; } - if (*cur != 0) return(0); + while (IS_BLANK(val)) { + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + if (!IS_LETTER(val) && (val != '_') && + (val != ':')) { + return(0); + } + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + + if (val != 0) return(0); return(1); } @@ -2649,25 +2669,30 @@ xmlValidateNamesValue(const xmlChar *value) { static int xmlValidateNmtokenValue(const xmlChar *value) { const xmlChar *cur; + int val, len; if (value == NULL) return(0); cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; - if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && - (*cur != '.') && (*cur != '-') && - (*cur != '_') && (*cur != ':') && - (!IS_COMBINING(*cur)) && - (!IS_EXTENDER(*cur))) + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) return(0); - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } - if (*cur != 0) return(0); + if (val != 0) return(0); return(1); } @@ -2686,45 +2711,59 @@ xmlValidateNmtokenValue(const xmlChar *value) { static int xmlValidateNmtokensValue(const xmlChar *value) { const xmlChar *cur; + int val, len; if (value == NULL) return(0); cur = value; + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; - while (IS_BLANK(*cur)) cur++; - if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && - (*cur != '.') && (*cur != '-') && - (*cur != '_') && (*cur != ':') && - (!IS_COMBINING(*cur)) && - (!IS_EXTENDER(*cur))) - return(0); - - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; - - while (IS_BLANK(*cur)) { - while (IS_BLANK(*cur)) cur++; - if (*cur == 0) return(1); - - if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && - (*cur != '.') && (*cur != '-') && - (*cur != '_') && (*cur != ':') && - (!IS_COMBINING(*cur)) && - (!IS_EXTENDER(*cur))) - return(0); - - while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || - (*cur == '.') || (*cur == '-') || - (*cur == '_') || (*cur == ':') || - (IS_COMBINING(*cur)) || - (IS_EXTENDER(*cur))) - cur++; + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; } - if (*cur != 0) return(0); + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) + return(0); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + + while (IS_BLANK(val)) { + while (IS_BLANK(val)) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + if (val == 0) return(1); + + if (!IS_LETTER(val) && !IS_DIGIT(val) && + (val != '.') && (val != '-') && + (val != '_') && (val != ':') && + (!IS_COMBINING(val)) && + (!IS_EXTENDER(val))) + return(0); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || (val == ':') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + val = xmlStringCurrentChar(NULL, cur, &len); + cur += len; + } + } + + if (val != 0) return(0); return(1); }