From c5b43cc03a52684e8780a17ed4f5154b568bc3c5 Mon Sep 17 00:00:00 2001
From: Daniel Veillard <veillard@src.gnome.org>
Date: Fri, 11 Jan 2008 07:41:39 +0000
Subject: [PATCH] avoid stopping parsing when encountering out of range
 characters in an

* HTMLparser.c: avoid stopping parsing when encountering
  out of range characters in an HTML file, report and
  continue processing instead, should fix #472696
Daniel

svn path=/trunk/; revision=3675
---
 ChangeLog    | 6 ++++++
 HTMLparser.c | 9 +++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2da148e2..83a592ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Jan 11 15:37:05 CST 2008 Daniel Veillard <daniel@veillard.com>
+
+	* HTMLparser.c: avoid stopping parsing when encountering
+	  out of range characters in an HTML file, report and 
+	  continue processing instead, should fix #472696
+
 Fri Jan 11 15:13:35 CST 2008 Daniel Veillard <daniel@veillard.com>
 
 	* check-relaxng-test-suite2.py check-relaxng-test-suite.py
diff --git a/HTMLparser.c b/HTMLparser.c
index 0574a033..38af5e3f 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -2772,8 +2772,13 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
     cur = CUR_CHAR(l);
     while (((cur != '<') || (ctxt->token == '<')) &&
            ((cur != '&') || (ctxt->token == '&')) && 
-	   (IS_CHAR(cur))) {
-	COPY_BUF(l,buf,nbchar,cur);
+	   (cur != 0)) {
+	if (!(IS_CHAR(cur))) {
+	    htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+	                "Invalid char in CDATA 0x%X\n", cur);
+	} else {
+	    COPY_BUF(l,buf,nbchar,cur);
+	}
 	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
 	    /*
 	     * Ok the segment is to be consumed as chars.