mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
Document support for the non-standard escape sequences.
Support non-BMP code points in surrogate pairs of '\uXXXX\uXXXX'.
This commit is contained in:
committed by
Nick Wellnhofer
parent
b66c19612c
commit
37ebf8a8b2
79
xmlregexp.c
79
xmlregexp.c
@@ -4907,6 +4907,47 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int parse_escaped_codeunit(xmlRegParserCtxtPtr ctxt)
|
||||||
|
{
|
||||||
|
int val = 0, i, cur;
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
NEXT;
|
||||||
|
val *= 16;
|
||||||
|
cur = CUR;
|
||||||
|
if (cur >= '0' && cur <= '9') {
|
||||||
|
val += cur - '0';
|
||||||
|
} else if (cur >= 'A' && cur <= 'F') {
|
||||||
|
val += cur - 'A' + 10;
|
||||||
|
} else if (cur >= 'a' && cur <= 'f') {
|
||||||
|
val += cur - 'a' + 10;
|
||||||
|
} else {
|
||||||
|
ERROR("Expecting hex digit");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int parse_escaped_codepoint(xmlRegParserCtxtPtr ctxt)
|
||||||
|
{
|
||||||
|
int val = parse_escaped_codeunit(ctxt);
|
||||||
|
if (0xD800 <= val && val <= 0xDBFF) {
|
||||||
|
NEXT;
|
||||||
|
if (CUR == '\\') {
|
||||||
|
NEXT;
|
||||||
|
if (CUR == 'u') {
|
||||||
|
int low = parse_escaped_codeunit(ctxt);
|
||||||
|
if (0xDC00 <= low && low <= 0xDFFF) {
|
||||||
|
return (val - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ERROR("Invalid low surrogate pair code unit");
|
||||||
|
val = -1;
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlFAParseCharClassEsc:
|
* xmlFAParseCharClassEsc:
|
||||||
* @ctxt: a regexp parser context
|
* @ctxt: a regexp parser context
|
||||||
@@ -4969,10 +5010,25 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
|
|||||||
(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
|
(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
|
||||||
(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
|
(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
|
||||||
(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
|
(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
|
||||||
(cur == 0x5E) || (cur == '!') || (cur == '"') || (cur == '#') ||
|
(cur == 0x5E) ||
|
||||||
(cur == '$') || (cur == '%') || (cur == ',') || (cur == '/') ||
|
|
||||||
(cur == ':') || (cur == ';') || (cur == '=') || (cur == '>') ||
|
/* Non-standard escape sequences:
|
||||||
(cur == '@') || (cur == '`') || (cur == '~') || (cur == 'u')) {
|
* Java 1.8|.NET Core 3.1|MSXML 6 */
|
||||||
|
(cur == '!') || /* + | + | + */
|
||||||
|
(cur == '"') || /* + | + | + */
|
||||||
|
(cur == '#') || /* + | + | + */
|
||||||
|
(cur == '$') || /* + | + | + */
|
||||||
|
(cur == '%') || /* + | + | + */
|
||||||
|
(cur == ',') || /* + | + | + */
|
||||||
|
(cur == '/') || /* + | + | + */
|
||||||
|
(cur == ':') || /* + | + | + */
|
||||||
|
(cur == ';') || /* + | + | + */
|
||||||
|
(cur == '=') || /* + | + | + */
|
||||||
|
(cur == '>') || /* | + | + */
|
||||||
|
(cur == '@') || /* + | + | + */
|
||||||
|
(cur == '`') || /* + | + | + */
|
||||||
|
(cur == '~') || /* + | + | + */
|
||||||
|
(cur == 'u')) { /* | + | + */
|
||||||
if (ctxt->atom == NULL) {
|
if (ctxt->atom == NULL) {
|
||||||
ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
|
ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
|
||||||
if (ctxt->atom != NULL) {
|
if (ctxt->atom != NULL) {
|
||||||
@@ -4987,21 +5043,12 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
|
|||||||
ctxt->atom->codepoint = '\t';
|
ctxt->atom->codepoint = '\t';
|
||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
{
|
cur = parse_escaped_codepoint(ctxt);
|
||||||
char hex_buffer[5];
|
if (cur < 0) {
|
||||||
int loop;
|
|
||||||
for (loop = 0; loop < 4; loop++) {
|
|
||||||
NEXT;
|
|
||||||
if (!('0' <= CUR && CUR <= '9') && !('a' <= CUR && CUR <= 'f') && !('A' <= CUR && CUR <= 'F')) {
|
|
||||||
ERROR("Expecting hex digit");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
hex_buffer[loop] = CUR;
|
ctxt->atom->codepoint = cur;
|
||||||
}
|
|
||||||
hex_buffer[4] = 0;
|
|
||||||
ctxt->atom->codepoint = (int)strtoul(hex_buffer, NULL, 16);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
ctxt->atom->codepoint = cur;
|
ctxt->atom->codepoint = cur;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user