From ec8ff95ce3c92caaa23e70b5df80418d83abd83d Mon Sep 17 00:00:00 2001 From: Damjan Jovanovic Date: Sat, 29 May 2021 16:36:44 +0200 Subject: [PATCH] Add support for some non-standard escapes in regular expressions. This adds support for some non-standard escape sequences observed in Microsoft's MSXML DLLs and used by Windows apps, and thus needed by Wine. Some are also used in other XML implementations, eg. Java's. This isn't intended to be final. We probably wish to toggle these non-standard escape sequences on and off somehow, as needed by the caller. Further discussion: https://gitlab.gnome.org/GNOME/libxml2/-/issues/260 --- xmlregexp.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/xmlregexp.c b/xmlregexp.c index f9aac42f..92bae6fb 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -4969,7 +4969,10 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || - (cur == 0x5E)) { + (cur == 0x5E) || (cur == '!') || (cur == '"') || (cur == '#') || + (cur == '$') || (cur == '%') || (cur == ',') || (cur == '/') || + (cur == ':') || (cur == ';') || (cur == '=') || (cur == '>') || + (cur == '@') || (cur == '`') || (cur == '~') || (cur == 'u')) { if (ctxt->atom == NULL) { ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); if (ctxt->atom != NULL) { @@ -4983,6 +4986,22 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { case 't': ctxt->atom->codepoint = '\t'; break; + case 'u': + { + char hex_buffer[5]; + int loop; + for (loop = 0; loop < 4; loop++) { + NEXT; + if (!('0' <= CUR && CUR <= '9') && !('a' <= CUR && CUR <= 'f') && !('A' <= CUR && CUR <= 'F')) { + ERROR("Expecting hex digit"); + return; + } + hex_buffer[loop] = CUR; + } + hex_buffer[4] = 0; + sscanf(hex_buffer, "%x", &ctxt->atom->codepoint); + break; + } default: ctxt->atom->codepoint = cur; }