1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

html: Allow U+000C FORM FEED as whitespace

This commit is contained in:
Nick Wellnhofer
2024-09-09 02:30:18 +02:00
parent 6edf1a645e
commit bca6485476

View File

@ -38,6 +38,10 @@
#define HTML_PARSER_BIG_BUFFER_SIZE 1000
#define HTML_PARSER_BUFFER_SIZE 100
#define IS_WS_HTML(c) \
(((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0C) || ((c) == 0x0D) || \
((c) == 0x20))
static int htmlOmittedDefaultValue = 1;
static int
@ -470,7 +474,7 @@ static int
htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
while (IS_BLANK_CH(*(ctxt->input->cur))) {
while (IS_WS_HTML(*(ctxt->input->cur))) {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
@ -2380,7 +2384,7 @@ static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
xmlDtdPtr dtd;
for (j = 0;j < len;j++)
if (!(IS_BLANK_CH(str[j]))) return(0);
if (!(IS_WS_HTML(str[j]))) return(0);
if (CUR == 0) return(1);
if (CUR != '<') return(0);
@ -2538,7 +2542,7 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
c = CUR_CHAR(l);
while ((c != 0) && (c != '/') && (c != '>') &&
((nbchar == 0) || (c != stop)) &&
(!IS_BLANK_CH(c))) {
(!IS_WS_HTML(c))) {
if (nbchar + l <= HTML_PARSER_BUFFER_SIZE) {
if ((c >= 'A') && (c <= 'Z')) {
buf[nbchar++] = c + 0x20;
@ -2805,7 +2809,7 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
while ((PARSER_STOPPED(ctxt) == 0) &&
(CUR != 0) && (CUR != stop)) {
if ((stop == 0) && (CUR == '>')) break;
if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
if ((stop == 0) && (IS_WS_HTML(CUR))) break;
if (out - buffer > buffer_size - 100) {
int indx = out - buffer;
@ -3077,7 +3081,7 @@ htmlParseCharData(htmlParserCtxtPtr ctxt, int terminate) {
if ((ctxt->name[i] == 0) && (j < len)) {
int c = NXT(j);
if ((c == '>') || (c == '/') || (IS_BLANK_CH(c))) {
if ((c == '>') || (c == '/') || (IS_WS_HTML(c))) {
if ((mode == DATA_SCRIPT_ESC1) && (!solidus)) {
mode = DATA_SCRIPT_ESC2;
} else if (mode == DATA_SCRIPT_ESC2) {
@ -3585,7 +3589,7 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
/*
* skip blank
*/
if (encoding && IS_BLANK_CH(*encoding))
if (encoding && IS_WS_HTML(*encoding))
encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
if (encoding && *encoding == '=') {
encoding ++;
@ -4716,26 +4720,26 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) {
switch (state) {
case LSTATE_TAG_NAME:
if (IS_BLANK_CH(c))
if (IS_WS_HTML(c))
state = LSTATE_BEFORE_ATTR_NAME;
break;
case LSTATE_BEFORE_ATTR_NAME:
if (!IS_BLANK_CH(c))
if (!IS_WS_HTML(c))
state = LSTATE_ATTR_NAME;
break;
case LSTATE_ATTR_NAME:
if (c == '=')
state = LSTATE_BEFORE_ATTR_VALUE;
else if (IS_BLANK(c))
else if (IS_WS_HTML(c))
state = LSTATE_AFTER_ATTR_NAME;
break;
case LSTATE_AFTER_ATTR_NAME:
if (c == '=')
state = LSTATE_BEFORE_ATTR_VALUE;
else if (!IS_BLANK(c))
else if (!IS_WS_HTML(c))
state = LSTATE_ATTR_NAME;
break;
@ -4744,7 +4748,7 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) {
state = LSTATE_ATTR_VALUE_DQUOTED;
else if (c == '\'')
state = LSTATE_ATTR_VALUE_SQUOTED;
else if (!IS_BLANK(c))
else if (!IS_WS_HTML(c))
state = LSTATE_ATTR_VALUE_UNQUOTED;
break;
@ -4759,7 +4763,7 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) {
break;
case LSTATE_ATTR_VALUE_UNQUOTED:
if (IS_BLANK_CH(c))
if (IS_WS_HTML(c))
state = LSTATE_BEFORE_ATTR_NAME;
break;
}