1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-23 01:52:48 +03:00

Add support for some non-standard escapes in regular expressions.

This adds support for some non-standard escape sequences observed
in Microsoft's MSXML DLLs and used by Windows apps, and thus
needed by Wine. Some are also used in other XML implementations,
eg. Java's.

This isn't intended to be final. We probably wish to toggle these
non-standard escape sequences on and off somehow, as needed by
the caller.

Further discussion: https://gitlab.gnome.org/GNOME/libxml2/-/issues/260
This commit is contained in:
Damjan Jovanovic
2021-05-29 16:36:44 +02:00
committed by Nick Wellnhofer
parent d7b287b94c
commit ec8ff95ce3

View File

@@ -4969,7 +4969,10 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
(cur == 0x5E)) {
(cur == 0x5E) || (cur == '!') || (cur == '"') || (cur == '#') ||
(cur == '$') || (cur == '%') || (cur == ',') || (cur == '/') ||
(cur == ':') || (cur == ';') || (cur == '=') || (cur == '>') ||
(cur == '@') || (cur == '`') || (cur == '~') || (cur == 'u')) {
if (ctxt->atom == NULL) {
ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
if (ctxt->atom != NULL) {
@@ -4983,6 +4986,22 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
case 't':
ctxt->atom->codepoint = '\t';
break;
case 'u':
{
char hex_buffer[5];
int loop;
for (loop = 0; loop < 4; loop++) {
NEXT;
if (!('0' <= CUR && CUR <= '9') && !('a' <= CUR && CUR <= 'f') && !('A' <= CUR && CUR <= 'F')) {
ERROR("Expecting hex digit");
return;
}
hex_buffer[loop] = CUR;
}
hex_buffer[4] = 0;
sscanf(hex_buffer, "%x", &ctxt->atom->codepoint);
break;
}
default:
ctxt->atom->codepoint = cur;
}