mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-28 23:14:57 +03:00
parser: Simplify xmlStringCurrentChar
Start to move away from using this function.
This commit is contained in:
@@ -321,7 +321,6 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
|
|||||||
************/
|
************/
|
||||||
|
|
||||||
#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
|
#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
|
||||||
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
|
|
||||||
|
|
||||||
#define COPY_BUF(l,b,i,v) \
|
#define COPY_BUF(l,b,i,v) \
|
||||||
if (l == 1) b[i++] = v; \
|
if (l == 1) b[i++] = v; \
|
||||||
|
|||||||
@@ -1029,104 +1029,18 @@ incomplete_sequence:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
|
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
|
||||||
{
|
const xmlChar *cur, int *len) {
|
||||||
if ((len == NULL) || (cur == NULL)) return(0);
|
int c;
|
||||||
if ((ctxt == NULL) || (ctxt->input == NULL) ||
|
|
||||||
((ctxt->input->flags & XML_INPUT_8_BIT) == 0)) {
|
|
||||||
/*
|
|
||||||
* We are supposed to handle UTF8, check it's valid
|
|
||||||
* From rfc2044: encoding of the Unicode values on UTF-8:
|
|
||||||
*
|
|
||||||
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
||||||
* 0000 0000-0000 007F 0xxxxxxx
|
|
||||||
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
|
||||||
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
|
||||||
*
|
|
||||||
* Check for the 0x110000 limit too
|
|
||||||
*/
|
|
||||||
unsigned char c;
|
|
||||||
unsigned int val;
|
|
||||||
|
|
||||||
c = *cur;
|
if ((cur == NULL) || (len == NULL))
|
||||||
if (c & 0x80) {
|
return(0);
|
||||||
if ((cur[1] & 0xc0) != 0x80)
|
|
||||||
goto encoding_error;
|
|
||||||
if ((c & 0xe0) == 0xe0) {
|
|
||||||
|
|
||||||
if ((cur[2] & 0xc0) != 0x80)
|
/* cur is zero-terminated, so we can lie about its length. */
|
||||||
goto encoding_error;
|
*len = 4;
|
||||||
if ((c & 0xf0) == 0xf0) {
|
c = xmlGetUTF8Char(cur, len);
|
||||||
if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
|
|
||||||
goto encoding_error;
|
|
||||||
/* 4-byte code */
|
|
||||||
*len = 4;
|
|
||||||
val = (cur[0] & 0x7) << 18;
|
|
||||||
val |= (cur[1] & 0x3f) << 12;
|
|
||||||
val |= (cur[2] & 0x3f) << 6;
|
|
||||||
val |= cur[3] & 0x3f;
|
|
||||||
} else {
|
|
||||||
/* 3-byte code */
|
|
||||||
*len = 3;
|
|
||||||
val = (cur[0] & 0xf) << 12;
|
|
||||||
val |= (cur[1] & 0x3f) << 6;
|
|
||||||
val |= cur[2] & 0x3f;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* 2-byte code */
|
|
||||||
*len = 2;
|
|
||||||
val = (cur[0] & 0x1f) << 6;
|
|
||||||
val |= cur[1] & 0x3f;
|
|
||||||
}
|
|
||||||
if (!IS_CHAR(val)) {
|
|
||||||
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
|
|
||||||
"Char 0x%X out of allowed range\n", val);
|
|
||||||
}
|
|
||||||
return (val);
|
|
||||||
} else {
|
|
||||||
/* 1-byte code */
|
|
||||||
*len = 1;
|
|
||||||
return (*cur);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Assume it's a fixed length encoding (1) with
|
|
||||||
* a compatible encoding for the ASCII set, since
|
|
||||||
* XML constructs only use < 128 chars
|
|
||||||
*/
|
|
||||||
*len = 1;
|
|
||||||
return (*cur);
|
|
||||||
encoding_error:
|
|
||||||
|
|
||||||
/*
|
return((c < 0) ? 0 : c);
|
||||||
* An encoding problem may arise from a truncated input buffer
|
|
||||||
* splitting a character in the middle. In that case do not raise
|
|
||||||
* an error but return 0 to indicate an end of stream problem
|
|
||||||
*/
|
|
||||||
if ((ctxt == NULL) || (ctxt->input == NULL) ||
|
|
||||||
(ctxt->input->end - ctxt->input->cur < 4)) {
|
|
||||||
*len = 0;
|
|
||||||
return(0);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* If we detect an UTF8 error that probably mean that the
|
|
||||||
* input encoding didn't get properly advertised in the
|
|
||||||
* declaration header. Report the error and switch the encoding
|
|
||||||
* to ISO-Latin-1 (if you don't like this policy, just declare the
|
|
||||||
* encoding !)
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
char buffer[150];
|
|
||||||
|
|
||||||
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
|
||||||
ctxt->input->cur[0], ctxt->input->cur[1],
|
|
||||||
ctxt->input->cur[2], ctxt->input->cur[3]);
|
|
||||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
|
||||||
"Input is not proper UTF-8, indicate encoding !\n%s",
|
|
||||||
BAD_CAST buffer, NULL);
|
|
||||||
}
|
|
||||||
*len = 1;
|
|
||||||
return (*cur);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1715,8 +1715,9 @@ xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url,
|
|||||||
int cur;
|
int cur;
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
cur = xmlStringCurrentChar(NULL, &content[i], &l);
|
l = len - i;
|
||||||
if (!IS_CHAR(cur)) {
|
cur = xmlGetUTF8Char(&content[i], &l);
|
||||||
|
if ((cur < 0) || (!IS_CHAR(cur))) {
|
||||||
xmlXIncludeErr(ctxt, ref->elem, XML_XINCLUDE_INVALID_CHAR,
|
xmlXIncludeErr(ctxt, ref->elem, XML_XINCLUDE_INVALID_CHAR,
|
||||||
"%s contains invalid char\n", URL);
|
"%s contains invalid char\n", URL);
|
||||||
goto error;
|
goto error;
|
||||||
|
|||||||
Reference in New Issue
Block a user