From ce02dbc430052b6c75f6853b7c5c161a36ec0213 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 22 Oct 2002 19:14:58 +0000 Subject: [PATCH] Mikhail Sogrine pointed out a bug in HTML parsing, applied his patch added * HTMLparser.c: Mikhail Sogrine pointed out a bug in HTML parsing, applied his patch * result/HTML/attrents.html result/HTML/attrents.html.err result/HTML/attrents.html.sax test/HTML/attrents.html: added the test and result case provided by Mikhail Sogrine Daniel --- ChangeLog | 8 ++++++++ HTMLparser.c | 7 +++++++ result/HTML/attrents.html | 4 ++++ result/HTML/attrents.html.err | 0 result/HTML/attrents.html.sax | 21 +++++++++++++++++++++ test/HTML/attrents.html | 5 +++++ 6 files changed, 45 insertions(+) create mode 100644 result/HTML/attrents.html create mode 100644 result/HTML/attrents.html.err create mode 100644 result/HTML/attrents.html.sax create mode 100644 test/HTML/attrents.html diff --git a/ChangeLog b/ChangeLog index 6612c396..a63d38af 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Tue Oct 22 21:13:06 CEST 2002 Daniel Veillard + + * HTMLparser.c: Mikhail Sogrine pointed out a bug in HTML + parsing, applied his patch + * result/HTML/attrents.html result/HTML/attrents.html.err + result/HTML/attrents.html.sax test/HTML/attrents.html: + added the test and result case provided by Mikhail Sogrine + Tue Oct 22 19:33:20 CEST 2002 Daniel Veillard * vms/build_libxml.com vms/config.vms vms/readme.vms diff --git a/HTMLparser.c b/HTMLparser.c index e4a52264..dad64022 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -1946,6 +1946,13 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { for ( ; bits >= 0; bits-= 6) { *out++ = ((c >> bits) & 0x3F) | 0x80; } + + if (out - buffer > buffer_size - 100) { + int indx = out - buffer; + + growBuffer(buffer); + out = &buffer[indx]; + } } else { ent = htmlParseEntityRef(ctxt, &name); if (name == NULL) { diff --git a/result/HTML/attrents.html b/result/HTML/attrents.html new file mode 100644 index 00000000..205430a6 --- /dev/null +++ b/result/HTML/attrents.html @@ -0,0 +1,4 @@ + + +
+ diff --git a/result/HTML/attrents.html.err b/result/HTML/attrents.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/attrents.html.sax b/result/HTML/attrents.html.sax new file mode 100644 index 00000000..976bbbed --- /dev/null +++ b/result/HTML/attrents.html.sax @@ -0,0 +1,21 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(html) +SAX.ignorableWhitespace( +, 2) +SAX.startElement(body, bgcolor='#FFFFFF') +SAX.ignorableWhitespace( + , 18) +SAX.startElement(a, href='mailto:katherine@cbfanc.org,website@bis.doc.gov?subject=South San Francisco BIS Seminar - October 16th') +SAX.endElement(a) +SAX.startElement(br) +SAX.endElement(br) +SAX.ignorableWhitespace( +, 2) +SAX.endElement(body) +SAX.ignorableWhitespace( +, 2) +SAX.endElement(html) +SAX.ignorableWhitespace( +, 2) +SAX.endDocument() diff --git a/test/HTML/attrents.html b/test/HTML/attrents.html new file mode 100644 index 00000000..8486ec0f --- /dev/null +++ b/test/HTML/attrents.html @@ -0,0 +1,5 @@ + + +
+ +