1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

patch from johan@evenhuis.nl for #107937 fixing some line counting

* HTMLparser.c parser.c parserInternals.c: patch from
  johan@evenhuis.nl for #107937 fixing some line counting
  problems, and some other cleanups.
* result/HTML/: this result in some line number changes
Daniel
This commit is contained in:
Daniel Veillard
2003-03-22 00:04:05 +00:00
parent 580ced8ee2
commit 77a90a7f8e
7 changed files with 186 additions and 154 deletions

View File

@ -1,3 +1,10 @@
Sat Mar 23 01:00:24 CET 2003 Daniel Veillard <daniel@veillard.com>
* HTMLparser.c parser.c parserInternals.c: patch from
johan@evenhuis.nl for #107937 fixing some line counting
problems, and some other cleanups.
* result/HTML/: this result in some line number changes
Fri Mar 21 22:19:14 CET 2003 Daniel Veillard <daniel@veillard.com>
* configure.in Makefile.am: fixed Red Hat bug #86118 use libxml2.spec

View File

@ -134,7 +134,7 @@ htmlnamePop(htmlParserCtxtPtr ctxt)
* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
* it should be used only to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings within the parser.
* strings without newlines within the parser.
*
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
@ -142,12 +142,13 @@ htmlnamePop(htmlParserCtxtPtr ctxt)
* UTF-8 if we are using this mode. It returns an int.
* NEXT Skip to the next character, this does the proper decoding
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
* NEXTL(l) Skip the current unicode character of l xmlChars long.
* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
*/
#define UPPER (toupper(*ctxt->input->cur))
#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
#define NXT(val) ctxt->input->cur[(val)]
@ -167,7 +168,7 @@ htmlnamePop(htmlParserCtxtPtr ctxt)
/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
#define CUR ((int) (*ctxt->input->cur))
#define NEXT xmlNextChar(ctxt),ctxt->nbChars++
#define NEXT xmlNextChar(ctxt)
#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
#define NXT(val) ctxt->input->cur[(val)]
@ -2220,6 +2221,8 @@ htmlParseName(htmlParserCtxtPtr ctxt) {
count = in - ctxt->input->cur;
ret = xmlStrndup(ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
ctxt->input->col += count;
return(ret);
}
}
@ -5203,6 +5206,8 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
}
memset(ctxt, 0, sizeof(htmlParserCtxt));
htmlInitParserCtxt(ctxt);
if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
ctxt->charset=XML_CHAR_ENCODING_UTF8;
if (sax != NULL) {
if (ctxt->sax != &htmlDefaultSAXHandler)
xmlFree(ctxt->sax);
@ -5225,6 +5230,7 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
inputStream = htmlNewInputStream(ctxt);
if (inputStream == NULL) {
xmlFreeParserCtxt(ctxt);
xmlFree(buf);
return(NULL);
}

View File

@ -339,13 +339,14 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings within the parser.
*
* strings without newlines within the parser.
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
* NEXT Skip to the next character, this does the proper decoding
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
* NEXTL(l) Skip l xmlChar in the input buffer
* NEXTL(l) Skip the current unicode character of l xmlChars long.
* CUR_CHAR(l) returns the current unicode character (int), set l
* to the number of xmlChars used for the encoding [0-5].
* CUR_SCHAR same but operate on a string instead of the context
@ -360,7 +361,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
#define CUR_PTR ctxt->input->cur
#define SKIP(val) do { \
ctxt->nbChars += (val),ctxt->input->cur += (val); \
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
if ((*ctxt->input->cur == 0) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
@ -392,6 +393,7 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) {
#define NEXT xmlNextChar(ctxt)
#define NEXT1 { \
ctxt->input->col++; \
ctxt->input->cur++; \
ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \
@ -578,6 +580,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->nbChars ++;
ctxt->input->cur++;
}
@ -606,6 +609,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->nbChars ++;
ctxt->input->cur++;
}
@ -1897,6 +1901,8 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
count = in - ctxt->input->cur;
ret = xmlStrndup(ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
ctxt->input->col += count;
return(ret);
}
}
@ -9149,6 +9155,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
inputStream = xmlNewInputStream(ctxt);
if (inputStream == NULL) {
xmlFreeParserCtxt(ctxt);
xmlFree(buf);
return(NULL);
}

View File

@ -1095,16 +1095,11 @@ xmlParserInputShrink(xmlParserInputPtr in) {
*/
void
xmlNextChar(xmlParserCtxtPtr ctxt) {
xmlNextChar(xmlParserCtxtPtr ctxt)
{
if (ctxt->instate == XML_PARSER_EOF)
return;
/*
* 2.11 End-of-Line Handling
* the literal two-character sequence "#xD#xA" or a standalone
* literal #xD, an XML processor must pass to the application
* the single character #xA.
*/
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
@ -1117,10 +1112,21 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
*/
xmlPopInput(ctxt);
} else {
const unsigned char *cur;
unsigned char c;
/*
* 2.11 End-of-Line Handling
* the literal two-character sequence "#xD#xA" or a standalone
* literal #xD, an XML processor must pass to the application
* the single character #xA.
*/
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
ctxt->input->line++;
ctxt->input->col = 1;
} else
ctxt->input->col++;
/*
* We are supposed to handle UTF8, check it's valid
* From rfc2044: encoding of the Unicode values on UTF-8:
@ -1132,8 +1138,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
*
* Check for the 0x110000 limit too
*/
const unsigned char *cur = ctxt->input->cur;
unsigned char c;
cur = ctxt->input->cur;
c = *cur;
if (c & 0x80) {
@ -1173,10 +1178,12 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
if ((ctxt->sax != NULL) &&
(ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Char 0x%X out of allowed range\n", val);
"Char 0x%X out of allowed range\n",
val);
ctxt->errNo = XML_ERR_INVALID_ENCODING;
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
} else
/* 2-byte code */
@ -1184,19 +1191,23 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
} else
/* 1-byte code */
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
} else {
/*
* Assume it's a fixed length encoding (1) with
* a compatible encoding for the ASCII set, since
* XML constructs only use < 128 chars
*/
ctxt->input->cur++;
}
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
} else {
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++;
ctxt->input->col = 1;
} else
ctxt->input->col++;
ctxt->input->cur++;
ctxt->nbChars++;
if (*ctxt->input->cur == 0)
@ -1219,7 +1230,8 @@ encoding_error:
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
ctxt->sax->error(ctxt->userData,
"Input is not proper UTF-8, indicate encoding !\n");
ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->sax->error(ctxt->userData,
"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
}

View File

@ -1,3 +1,3 @@
./test/HTML/doc2.htm:5: error: Misplaced DOCTYPE declaration
./test/HTML/doc2.htm:10: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Tr
^

View File

@ -1,69 +1,69 @@
./test/HTML/doc3.htm:5: error: Misplaced DOCTYPE declaration
./test/HTML/doc3.htm:10: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//E
^
./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';'
href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&i
^
./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';'
_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&medi
^
./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';'
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&i
^
./test/HTML/doc3.htm:140: error: error parsing attribute name
./test/HTML/doc3.htm:145: error: error parsing attribute name
width=70 Gentus?.?></A><BR><A
^
./test/HTML/doc3.htm:143: error: Unexpected end tag : p
./test/HTML/doc3.htm:148: error: Unexpected end tag : p
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P
^
./test/HTML/doc3.htm:231: error: Unexpected end tag : font
./test/HTML/doc3.htm:236: error: Unexpected end tag : font
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
^
./test/HTML/doc3.htm:231: error: Unexpected end tag : a
./test/HTML/doc3.htm:236: error: Unexpected end tag : a
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
^
./test/HTML/doc3.htm:742: error: htmlParseEntityRef: expecting ';'
./test/HTML/doc3.htm:747: error: htmlParseEntityRef: expecting ';'
er=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&si
^
./test/HTML/doc3.htm:742: error: htmlParseEntityRef: expecting ';'
./test/HTML/doc3.htm:747: error: htmlParseEntityRef: expecting ';'
Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asi
^
./test/HTML/doc3.htm:742: error: Unexpected end tag : li
./test/HTML/doc3.htm:747: error: Unexpected end tag : li
light.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI
^
./test/HTML/doc3.htm:742: error: Unexpected end tag : font
./test/HTML/doc3.htm:747: error: Unexpected end tag : font
om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
^
./test/HTML/doc3.htm:742: error: Unexpected end tag : p
./test/HTML/doc3.htm:747: error: Unexpected end tag : p
=7708"></a></IFRAME></CENTER></LI></FONT></TD></TR></TBODY></TABLE></CENTER></P
^
./test/HTML/doc3.htm:767: error: Unexpected end tag : form
./test/HTML/doc3.htm:772: error: Unexpected end tag : form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^
./test/HTML/doc3.htm:815: error: Unexpected end tag : a
./test/HTML/doc3.htm:820: error: Unexpected end tag : a
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^
./test/HTML/doc3.htm:815: error: Unexpected end tag : noscript
./test/HTML/doc3.htm:820: error: Unexpected end tag : noscript
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^
./test/HTML/doc3.htm:821: error: Opening and ending tag mismatch: form and center
./test/HTML/doc3.htm:826: error: Opening and ending tag mismatch: form and center
</FORM><!-- Pricewatch Search Box --><A
^
./test/HTML/doc3.htm:828: error: Unexpected end tag : p
./test/HTML/doc3.htm:833: error: Unexpected end tag : p
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^
./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and td
./test/HTML/doc3.htm:833: error: Opening and ending tag mismatch: center and td
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^
./test/HTML/doc3.htm:834: error: Unexpected end tag : p
./test/HTML/doc3.htm:839: error: Unexpected end tag : p
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^
./test/HTML/doc3.htm:835: error: Unexpected end tag : td
./test/HTML/doc3.htm:840: error: Unexpected end tag : td
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^
./test/HTML/doc3.htm:835: error: Unexpected end tag : tr
./test/HTML/doc3.htm:840: error: Unexpected end tag : tr
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^
./test/HTML/doc3.htm:836: error: Unexpected end tag : table
./test/HTML/doc3.htm:841: error: Unexpected end tag : table
HEIGHT="70">&nbsp;</TD> </TR></TABLE>
^

View File

@ -205,45 +205,45 @@ Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br> </t
./test/HTML/wired.html:402: error: Opening and ending tag mismatch: a and font
w.vignette.com/" style="text-decoration:none"><font color="#000000">Vignette</a
^
./test/HTML/wired.html:406: error: htmlParseEntityRef: expecting ';'
./test/HTML/wired.html:407: error: htmlParseEntityRef: expecting ';'
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
^
./test/HTML/wired.html:406: error: htmlParseEntityRef: expecting ';'
./test/HTML/wired.html:407: error: htmlParseEntityRef: expecting ';'
ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
^
./test/HTML/wired.html:406: error: htmlParseEntityRef: expecting ';'
./test/HTML/wired.html:408: error: htmlParseEntityRef: expecting ';'
wired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Spr
^
./test/HTML/wired.html:406: error: Opening and ending tag mismatch: a and font
./test/HTML/wired.html:408: error: Opening and ending tag mismatch: a and font
com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a
^
./test/HTML/wired.html:406: error: End tag : expected '>'
./test/HTML/wired.html:408: error: End tag : expected '>'
=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a></i></fon
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font
./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
</td>
^
./test/HTML/wired.html:430: error: htmlParseEntityRef: expecting ';'
./test/HTML/wired.html:432: error: htmlParseEntityRef: expecting ';'
href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1">Lycos</
^