1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-08-08 17:42:14 +03:00

fixed validation of attributes content of type NAME NAMES NMTOKEN and

* valid.c: fixed validation of attributes content of type
  NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
  values, very old bug. Fixes #67671
Daniel
This commit is contained in:
Daniel Veillard
2002-01-13 15:43:22 +00:00
parent 8107a22f96
commit d8224e0f7e
3 changed files with 197 additions and 147 deletions

View File

@@ -1,3 +1,9 @@
Sun Jan 13 16:37:15 CET 2002 Daniel Veillard <daniel@veillard.com>
* valid.c: fixed validation of attributes content of type
NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
values, very old bug. Fixes #67671
Sun Jan 13 15:07:49 CET 2002 Daniel Veillard <daniel@veillard.com> Sun Jan 13 15:07:49 CET 2002 Daniel Veillard <daniel@veillard.com>
* parser.c include/libxml/parserInternals.h tree.c: integrated * parser.c include/libxml/parserInternals.h tree.c: integrated

View File

@@ -1391,68 +1391,69 @@ encoding_error:
*/ */
int int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
{
if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
/* /*
* We are supposed to handle UTF8, check it's valid * We are supposed to handle UTF8, check it's valid
* From rfc2044: encoding of the Unicode values on UTF-8: * From rfc2044: encoding of the Unicode values on UTF-8:
* *
* UCS-4 range (hex.) UTF-8 octet sequence (binary) * UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx * 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
* *
* Check for the 0x110000 limit too * Check for the 0x110000 limit too
*/ */
unsigned char c; unsigned char c;
unsigned int val; unsigned int val;
c = *cur; c = *cur;
if (c & 0x80) { if (c & 0x80) {
if ((cur[1] & 0xc0) != 0x80) if ((cur[1] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xe0) == 0xe0) { if ((c & 0xe0) == 0xe0) {
if ((cur[2] & 0xc0) != 0x80) if ((cur[2] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xf0) == 0xf0) { if ((c & 0xf0) == 0xf0) {
if (((c & 0xf8) != 0xf0) || if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
((cur[3] & 0xc0) != 0x80)) goto encoding_error;
goto encoding_error; /* 4-byte code */
/* 4-byte code */ *len = 4;
*len = 4; val = (cur[0] & 0x7) << 18;
val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12;
val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6;
val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f;
val |= cur[3] & 0x3f; } else {
} else { /* 3-byte code */
/* 3-byte code */ *len = 3;
*len = 3; val = (cur[0] & 0xf) << 12;
val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6;
val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f;
val |= cur[2] & 0x3f; }
} } else {
} else { /* 2-byte code */
/* 2-byte code */ *len = 2;
*len = 2; val = (cur[0] & 0x1f) << 6;
val = (cur[0] & 0x1f) << 6; val |= cur[1] & 0x3f;
val |= cur[1] & 0x3f; }
} if (!IS_CHAR(val)) {
if (!IS_CHAR(val)) { if ((ctxt != NULL) && (ctxt->sax != NULL) &&
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
(ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData,
ctxt->sax->error(ctxt->userData, "Char 0x%X out of allowed range\n",
"Char 0x%X out of allowed range\n", val); val);
ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->errNo = XML_ERR_INVALID_ENCODING;
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
ctxt->disableSAX = 1; ctxt->disableSAX = 1;
} }
return(val); return (val);
} else { } else {
/* 1-byte code */ /* 1-byte code */
*len = 1; *len = 1;
return((int) *cur); return ((int) *cur);
} }
} }
/* /*
* Assume it's a fixed length encoding (1) with * Assume it's a fixed length encoding (1) with
@@ -1460,8 +1461,9 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
* XML constructs only use < 128 chars * XML constructs only use < 128 chars
*/ */
*len = 1; *len = 1;
return((int) *cur); return ((int) *cur);
encoding_error: encoding_error:
/* /*
* If we detect an UTF8 error that probably mean that the * If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the * input encoding didn't get properly advertised in the
@@ -1469,17 +1471,20 @@ encoding_error:
* to ISO-Latin-1 (if you don't like this policy, just declare the * to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !) * encoding !)
*/ */
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { if (ctxt != NULL) {
ctxt->sax->error(ctxt->userData, if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
"Input is not proper UTF-8, indicate encoding !\n"); ctxt->sax->error(ctxt->userData,
ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", "Input is not proper UTF-8, indicate encoding !\n");
ctxt->input->cur[0], ctxt->input->cur[1], ctxt->sax->error(ctxt->userData,
ctxt->input->cur[2], ctxt->input->cur[3]); "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
}
ctxt->errNo = XML_ERR_INVALID_ENCODING;
} }
ctxt->errNo = XML_ERR_INVALID_ENCODING;
*len = 1; *len = 1;
return((int) *cur); return ((int) *cur);
} }
/** /**

197
valid.c
View File

@@ -2565,23 +2565,29 @@ xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) {
static int static int
xmlValidateNameValue(const xmlChar *value) { xmlValidateNameValue(const xmlChar *value) {
const xmlChar *cur; const xmlChar *cur;
int val, len;
if (value == NULL) return(0); if (value == NULL) return(0);
cur = value; cur = value;
val = xmlStringCurrentChar(NULL, cur, &len);
if (!IS_LETTER(*cur) && (*cur != '_') && cur += len;
(*cur != ':')) { if (!IS_LETTER(val) && (val != '_') &&
(val != ':')) {
return(0); return(0);
} }
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || val = xmlStringCurrentChar(NULL, cur, &len);
(*cur == '.') || (*cur == '-') || cur += len;
(*cur == '_') || (*cur == ':') || while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(IS_COMBINING(*cur)) || (val == '.') || (val == '-') ||
(IS_EXTENDER(*cur))) (val == '_') || (val == ':') ||
cur++; (IS_COMBINING(val)) ||
(IS_EXTENDER(val))) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
if (*cur != 0) return(0); if (val != 0) return(0);
return(1); return(1);
} }
@@ -2598,39 +2604,53 @@ xmlValidateNameValue(const xmlChar *value) {
static int static int
xmlValidateNamesValue(const xmlChar *value) { xmlValidateNamesValue(const xmlChar *value) {
const xmlChar *cur; const xmlChar *cur;
int val, len;
if (value == NULL) return(0); if (value == NULL) return(0);
cur = value; cur = value;
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
if (!IS_LETTER(*cur) && (*cur != '_') && if (!IS_LETTER(val) && (val != '_') &&
(*cur != ':')) { (val != ':')) {
return(0); return(0);
} }
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || val = xmlStringCurrentChar(NULL, cur, &len);
(*cur == '.') || (*cur == '-') || cur += len;
(*cur == '_') || (*cur == ':') || while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(IS_COMBINING(*cur)) || (val == '.') || (val == '-') ||
(IS_EXTENDER(*cur))) (val == '_') || (val == ':') ||
cur++; (IS_COMBINING(val)) ||
(IS_EXTENDER(val))) {
while (IS_BLANK(*cur)) { val = xmlStringCurrentChar(NULL, cur, &len);
while (IS_BLANK(*cur)) cur++; cur += len;
if (!IS_LETTER(*cur) && (*cur != '_') &&
(*cur != ':')) {
return(0);
}
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') ||
(*cur == '_') || (*cur == ':') ||
(IS_COMBINING(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
} }
if (*cur != 0) return(0); while (IS_BLANK(val)) {
while (IS_BLANK(val)) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
if (!IS_LETTER(val) && (val != '_') &&
(val != ':')) {
return(0);
}
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(val == '.') || (val == '-') ||
(val == '_') || (val == ':') ||
(IS_COMBINING(val)) ||
(IS_EXTENDER(val))) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
}
if (val != 0) return(0);
return(1); return(1);
} }
@@ -2649,25 +2669,30 @@ xmlValidateNamesValue(const xmlChar *value) {
static int static int
xmlValidateNmtokenValue(const xmlChar *value) { xmlValidateNmtokenValue(const xmlChar *value) {
const xmlChar *cur; const xmlChar *cur;
int val, len;
if (value == NULL) return(0); if (value == NULL) return(0);
cur = value; cur = value;
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && if (!IS_LETTER(val) && !IS_DIGIT(val) &&
(*cur != '.') && (*cur != '-') && (val != '.') && (val != '-') &&
(*cur != '_') && (*cur != ':') && (val != '_') && (val != ':') &&
(!IS_COMBINING(*cur)) && (!IS_COMBINING(val)) &&
(!IS_EXTENDER(*cur))) (!IS_EXTENDER(val)))
return(0); return(0);
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(*cur == '.') || (*cur == '-') || (val == '.') || (val == '-') ||
(*cur == '_') || (*cur == ':') || (val == '_') || (val == ':') ||
(IS_COMBINING(*cur)) || (IS_COMBINING(val)) ||
(IS_EXTENDER(*cur))) (IS_EXTENDER(val))) {
cur++; val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
if (*cur != 0) return(0); if (val != 0) return(0);
return(1); return(1);
} }
@@ -2686,45 +2711,59 @@ xmlValidateNmtokenValue(const xmlChar *value) {
static int static int
xmlValidateNmtokensValue(const xmlChar *value) { xmlValidateNmtokensValue(const xmlChar *value) {
const xmlChar *cur; const xmlChar *cur;
int val, len;
if (value == NULL) return(0); if (value == NULL) return(0);
cur = value; cur = value;
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
while (IS_BLANK(*cur)) cur++; while (IS_BLANK(val)) {
if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && val = xmlStringCurrentChar(NULL, cur, &len);
(*cur != '.') && (*cur != '-') && cur += len;
(*cur != '_') && (*cur != ':') &&
(!IS_COMBINING(*cur)) &&
(!IS_EXTENDER(*cur)))
return(0);
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') ||
(*cur == '_') || (*cur == ':') ||
(IS_COMBINING(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
while (IS_BLANK(*cur)) {
while (IS_BLANK(*cur)) cur++;
if (*cur == 0) return(1);
if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
(*cur != '.') && (*cur != '-') &&
(*cur != '_') && (*cur != ':') &&
(!IS_COMBINING(*cur)) &&
(!IS_EXTENDER(*cur)))
return(0);
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') ||
(*cur == '_') || (*cur == ':') ||
(IS_COMBINING(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
} }
if (*cur != 0) return(0); if (!IS_LETTER(val) && !IS_DIGIT(val) &&
(val != '.') && (val != '-') &&
(val != '_') && (val != ':') &&
(!IS_COMBINING(val)) &&
(!IS_EXTENDER(val)))
return(0);
while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(val == '.') || (val == '-') ||
(val == '_') || (val == ':') ||
(IS_COMBINING(val)) ||
(IS_EXTENDER(val))) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
while (IS_BLANK(val)) {
while (IS_BLANK(val)) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
if (val == 0) return(1);
if (!IS_LETTER(val) && !IS_DIGIT(val) &&
(val != '.') && (val != '-') &&
(val != '_') && (val != ':') &&
(!IS_COMBINING(val)) &&
(!IS_EXTENDER(val)))
return(0);
while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
(val == '.') || (val == '-') ||
(val == '_') || (val == ':') ||
(IS_COMBINING(val)) ||
(IS_EXTENDER(val))) {
val = xmlStringCurrentChar(NULL, cur, &len);
cur += len;
}
}
if (val != 0) return(0);
return(1); return(1);
} }