From 8cc0d1f88966694e6562ee5bb2ca4406da11d64b Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Mon, 16 Nov 1998 01:04:26 +0000 Subject: [PATCH] Better entities and char ref encoding, error msg formatting, Daniel. --- ChangeLog | 5 +++++ entities.c | 40 +++++++++++++++++++++++++++++++++++----- parser.c | 15 +++++++++------ 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index e51225f3..15040f5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Sun Nov 15 19:59:47 EST 1998 Daniel Veillard + + * parser.c, entities.c: improve entities and char ref encoding, + and cleanups of error messages. + Fri Nov 13 13:03:10 EST 1998 Daniel Veillard * parser.c, entities.c: simple bug hunting done during rpm2html and diff --git a/entities.c b/entities.c index 0b418001..28a35826 100644 --- a/entities.c +++ b/entities.c @@ -248,6 +248,15 @@ xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { return(NULL); } +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) + /* * xmlEncodeEntities : do a global encoding of a string, replacing the * predefined entities and non ASCII values with their @@ -255,6 +264,7 @@ xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii * get erroneous. */ + CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { const CHAR *cur = input; CHAR *out = buffer; @@ -310,6 +320,12 @@ CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { *out++ = 'o'; *out++ = 's'; *out++ = ';'; + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; #ifndef USE_UTF_8 } else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) { char buf[10], *ptr; @@ -321,12 +337,26 @@ CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { ptr = buf; while (*ptr != 0) *out++ = *ptr++; #endif - } else { - /* - * default case, just copy ! - */ - *out++ = *cur; + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + +#ifdef HAVE_SNPRINTF + snprintf(buf, 9, "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif cur++; } *out++ = 0; diff --git a/parser.c b/parser.c index 2b1b0081..5464b263 100644 --- a/parser.c +++ b/parser.c @@ -1080,7 +1080,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { while (IS_CHAR(CUR) && (CUR != '"')) NEXT; if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q); + ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1091,7 +1091,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { while (IS_CHAR(CUR) && (CUR != '\'')) NEXT; if (CUR != '\'') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "String not closed\"%.50s\n", q); + ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -2253,7 +2253,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { val = val * 16 + (CUR - 'A') + 10; else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n"); + ctxt->sax->error(ctxt, + "xmlParseCharRef: invalid hexa value\n"); val = 0; break; } @@ -2268,7 +2269,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { val = val * 10 + (CUR - '0'); else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n"); + ctxt->sax->error(ctxt, + "xmlParseCharRef: invalid decimal value\n"); val = 0; break; } @@ -2289,7 +2291,8 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { return(xmlStrndup(buf, 1)); } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value"); + ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n", + val); } return(NULL); } @@ -3932,7 +3935,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, if ( tmp_buffer == NULL ) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt, "Out of memory"); + ctxt->sax->error(ctxt, "Out of memory\n"); return; } ctxt->node_seq.buffer = tmp_buffer;