1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

entities: Rework serialization of numeric character references

This commit is contained in:
Nick Wellnhofer
2024-07-12 03:07:57 +02:00
parent 8d1606265d
commit 1cfc5b8089
4 changed files with 92 additions and 71 deletions

View File

@@ -43,6 +43,7 @@
#include "private/buf.h" #include "private/buf.h"
#include "private/enc.h" #include "private/enc.h"
#include "private/entities.h"
#include "private/error.h" #include "private/error.h"
#ifdef LIBXML_ICU_ENABLED #ifdef LIBXML_ICU_ENABLED
@@ -1744,8 +1745,7 @@ retry:
* and continue the transcoding phase, hoping the error * and continue the transcoding phase, hoping the error
* did not mangle the encoder state. * did not mangle the encoder state.
*/ */
charrefLen = snprintf((char *) &charref[0], sizeof(charref), charrefLen = xmlSerializeDecCharRef((char *) charref, cur);
"&#%d;", cur);
xmlBufGrow(out, charrefLen * 4); xmlBufGrow(out, charrefLen * 4);
c_out = xmlBufAvail(out); c_out = xmlBufAvail(out);
c_in = charrefLen; c_in = charrefLen;
@@ -1856,8 +1856,7 @@ retry:
* and continue the transcoding phase, hoping the error * and continue the transcoding phase, hoping the error
* did not mangle the encoder state. * did not mangle the encoder state.
*/ */
charrefLen = snprintf((char *) &charref[0], sizeof(charref), charrefLen = xmlSerializeDecCharRef((char *) charref, cur);
"&#%d;", cur);
xmlBufferShrink(in, len); xmlBufferShrink(in, len);
xmlBufferGrow(out, charrefLen * 4); xmlBufferGrow(out, charrefLen * 4);
written = out->size - out->use - 1; written = out->size - out->use - 1;

View File

@@ -512,6 +512,71 @@ xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
return(xmlGetPredefinedEntity(name)); return(xmlGetPredefinedEntity(name));
} }
int
xmlSerializeHexCharRef(char *buf, int val) {
char *out = buf;
int shift = 0, bits;
*out++ = '&';
*out++ = '#';
*out++ = 'x';
bits = val;
if (bits & 0xFF0000) {
shift = 16;
bits &= 0xFF0000;
} else if (bits & 0x00FF00) {
shift = 8;
bits &= 0x00FF00;
}
if (bits & 0xF0F0F0) {
shift += 4;
}
do {
int d = (val >> shift) & 0x0F;
if (d < 10)
*out++ = '0' + d;
else
*out++ = 'A' + (d - 10);
shift -= 4;
} while (shift >= 0);
*out++ = ';';
return(out - buf);
}
int
xmlSerializeDecCharRef(char *buf, int val) {
char *out = buf;
int len, i;
*out++ = '&';
*out++ = '#';
if (val < 100) {
len = (val < 10) ? 1 : 2;
} else if (val < 10000) {
len = (val < 1000) ? 3 : 4;
} else if (val < 1000000) {
len = (val < 100000) ? 5 : 6;
} else {
len = 7;
}
for (i = len - 1; i >= 0; i--) {
out[i] = '0' + val % 10;
val /= 10;
}
out[len] = ';';
return(len + 3);
}
static const char xmlEscapeSafe[128] = { static const char xmlEscapeSafe[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -540,7 +605,7 @@ xmlEscapeText(const xmlChar *text, int flags) {
unescaped = cur; unescaped = cur;
while (*cur != '\0') { while (*cur != '\0') {
char buf[13]; char buf[12];
const xmlChar *end; const xmlChar *end;
const xmlChar *repl; const xmlChar *repl;
size_t used; size_t used;
@@ -618,7 +683,7 @@ xmlEscapeText(const xmlChar *text, int flags) {
val = 0xFFFD; val = 0xFFFD;
} }
replSize = snprintf(buf, sizeof(buf), "&#x%X;", val); replSize = xmlSerializeHexCharRef(buf, val);
repl = BAD_CAST buf; repl = BAD_CAST buf;
} else if ((flags & XML_ESCAPE_ALLOW_INVALID) || } else if ((flags & XML_ESCAPE_ALLOW_INVALID) ||
(c >= 0x20) || (c >= 0x20) ||

View File

@@ -27,6 +27,11 @@
#define XML_ESCAPE_QUOT (1u << 3) #define XML_ESCAPE_QUOT (1u << 3)
#define XML_ESCAPE_ALLOW_INVALID (1u << 4) #define XML_ESCAPE_ALLOW_INVALID (1u << 4)
XML_HIDDEN int
xmlSerializeHexCharRef(char *buf, int val);
XML_HIDDEN int
xmlSerializeDecCharRef(char *buf, int val);
XML_HIDDEN xmlChar * XML_HIDDEN xmlChar *
xmlEscapeText(const xmlChar *text, int flags); xmlEscapeText(const xmlChar *text, int flags);

View File

@@ -125,51 +125,10 @@ xmlSaveErr(xmlOutputBufferPtr out, int code, xmlNodePtr node,
* Special escaping routines * * Special escaping routines *
* * * *
************************************************************************/ ************************************************************************/
static char *
xmlSerializeHexCharRef(char *out, int val) {
char *ptr;
*out++ = '&';
*out++ = '#';
*out++ = 'x';
if (val < 0x10) ptr = out;
else if (val < 0x100) ptr = out + 1;
else if (val < 0x1000) ptr = out + 2;
else if (val < 0x10000) ptr = out + 3;
else if (val < 0x100000) ptr = out + 4;
else ptr = out + 5;
out = ptr + 1;
while (val > 0) {
switch (val & 0xF) {
case 0: *ptr-- = '0'; break;
case 1: *ptr-- = '1'; break;
case 2: *ptr-- = '2'; break;
case 3: *ptr-- = '3'; break;
case 4: *ptr-- = '4'; break;
case 5: *ptr-- = '5'; break;
case 6: *ptr-- = '6'; break;
case 7: *ptr-- = '7'; break;
case 8: *ptr-- = '8'; break;
case 9: *ptr-- = '9'; break;
case 0xA: *ptr-- = 'A'; break;
case 0xB: *ptr-- = 'B'; break;
case 0xC: *ptr-- = 'C'; break;
case 0xD: *ptr-- = 'D'; break;
case 0xE: *ptr-- = 'E'; break;
case 0xF: *ptr-- = 'F'; break;
default: *ptr-- = '0'; break;
}
val >>= 4;
}
*out++ = ';';
*out = 0;
return(out);
}
static void static void
xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string, xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
unsigned flags) { unsigned flags) {
char tmp[12];
const char *base, *cur; const char *base, *cur;
if (string == NULL) if (string == NULL)
@@ -178,33 +137,12 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
base = cur = (const char *) string; base = cur = (const char *) string;
while (*cur != 0) { while (*cur != 0) {
char tempBuf[12];
const char *repl = NULL; const char *repl = NULL;
int replSize = 0; int replSize = 0;
int chunkSize = 1;
int c = (unsigned char) *cur; int c = (unsigned char) *cur;
if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) {
int val = 0, l = 4;
if (base != cur)
xmlOutputBufferWrite(buf, cur - base, base);
val = xmlGetUTF8Char((const xmlChar *) cur, &l);
if (val < 0) {
val = 0xFFFD;
cur++;
} else {
if (!IS_CHAR(val))
val = 0xFFFD;
cur += l;
}
xmlSerializeHexCharRef(tmp, val);
xmlOutputBufferWriteString(buf, tmp);
base = cur;
continue;
}
switch (c) { switch (c) {
case '\t': case '\t':
if (flags & XML_ESCAPE_ATTR) { if (flags & XML_ESCAPE_ATTR) {
@@ -255,6 +193,20 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
if (c < 0x20) { if (c < 0x20) {
repl = "&#xFFFD;"; repl = "&#xFFFD;";
replSize = 8; replSize = 8;
} else if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) {
int val = 0, l = 4;
val = xmlGetUTF8Char((const xmlChar *) cur, &l);
if (val < 0) {
val = 0xFFFD;
} else {
if (!IS_CHAR(val))
val = 0xFFFD;
chunkSize = l;
}
replSize = xmlSerializeHexCharRef(tempBuf, val);
repl = tempBuf;
} }
break; break;
} }
@@ -265,7 +217,7 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
if (base != cur) if (base != cur)
xmlOutputBufferWrite(buf, cur - base, base); xmlOutputBufferWrite(buf, cur - base, base);
xmlOutputBufferWrite(buf, replSize, repl); xmlOutputBufferWrite(buf, replSize, repl);
cur++; cur += chunkSize;
base = cur; base = cur;
} }
} }