diff --git a/ChangeLog b/ChangeLog index 38cba93d..772de75a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Wed Sep 3 15:24:41 CEST 2003 Daniel Veillard + + * HTMLparser.c: when creating a DOCTYPE use "html" lowercase + by default instead of "HTML" + * parser.c xmlreader.c: optimization, gain a few % parsing speed by + avoiding calls to "areBlanks" when not needed. + * include/libxml/parser.h include/libxml/tree.h: some structure + extensions for future work on using per-document dictionaries. + Wed Sep 3 15:08:06 CEST 2003 Daniel Veillard * Makefile.am results/*.sax SAXResult/*: removing the SAXresults diff --git a/HTMLparser.c b/HTMLparser.c index 7c404740..90e2460e 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2137,7 +2137,7 @@ htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { cur->_private = NULL; if ((ExternalID != NULL) || (URI != NULL)) - xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI); + xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); return(cur); } @@ -4002,7 +4002,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { dtd = xmlGetIntSubset(ctxt->myDoc); if (dtd == NULL) ctxt->myDoc->intSubset = - xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML", + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); } @@ -5058,7 +5058,7 @@ done: dtd = xmlGetIntSubset(ctxt->myDoc); if (dtd == NULL) ctxt->myDoc->intSubset = - xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "HTML", + xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); } diff --git a/include/libxml/parser.h b/include/libxml/parser.h index cb555ef4..e52013b9 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -237,8 +237,9 @@ struct _xmlParserCtxt { int recovery; /* run in recovery mode */ int progressive; /* is this a progressive parsing */ xmlDictPtr dict; /* dictionnary for the parser */ - const xmlChar * *atts; /* array for the attributes callbacks */ + const xmlChar * *atts; /* array for the attributes callbacks */ int maxatts; /* the size of the array */ + int docdict; /* use strings from dict to build tree */ }; /** diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 9c1fda80..9a236a4a 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -476,6 +476,7 @@ struct _xmlNode { #define XML_GET_LINE(n) \ ((n)->type == XML_ELEMENT_NODE ? (int) (n)->content : 0) + /** * xmlDoc: * @@ -507,6 +508,7 @@ struct _xmlDoc { const xmlChar *URL; /* The URI for that document */ int charset; /* encoding of the in-memory content actually an xmlCharEncoding */ + struct _xmlDict *dict; /* dict used to allocate names or NULL */ }; /** diff --git a/parser.c b/parser.c index 8c75a355..c2c4ff6b 100644 --- a/parser.c +++ b/parser.c @@ -2799,18 +2799,18 @@ get_more: } nbchar = in - ctxt->input->cur; if (nbchar > 0) { - if (IS_BLANK(*ctxt->input->cur)) { + if ((ctxt->sax->ignorableWhitespace != + ctxt->sax->characters) && + (IS_BLANK(*ctxt->input->cur))) { const xmlChar *tmp = ctxt->input->cur; ctxt->input->cur = in; + if (areBlanks(ctxt, tmp, nbchar)) { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - tmp, nbchar); - } else { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, - tmp, nbchar); - } + ctxt->sax->ignorableWhitespace(ctxt->userData, + tmp, nbchar); + } else if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, + tmp, nbchar); line = ctxt->input->line; col = ctxt->input->col; } else { diff --git a/xmlreader.c b/xmlreader.c index cdc4f9ec..a8ec5785 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -1125,6 +1125,7 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { ret->sax->endElement = xmlTextReaderEndElement; ret->characters = ret->sax->characters; ret->sax->characters = xmlTextReaderCharacters; + ret->sax->ignorableWhitespace = xmlTextReaderCharacters; ret->cdataBlock = ret->sax->cdataBlock; ret->sax->cdataBlock = xmlTextReaderCDataBlock; @@ -1152,6 +1153,10 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { ret->ctxt->_private = ret; ret->ctxt->linenumbers = 1; ret->allocs = XML_TEXTREADER_CTXT; + /* + * use the parser dictionnary to allocate all elements and attributes names + */ + ret->ctxt->docdict = 1; return(ret); }