1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

some parser optimizations, xmllint --memory --timing --repeat --stream

* dict.c parser.c xmlstring.c: some parser optimizations,
  xmllint --memory --timing --repeat --stream ./db10000.xml
  went down from 16.5 secs to 15.5 secs.
Daniel
This commit is contained in:
Daniel Veillard
2004-01-12 16:24:34 +00:00
parent 65765284fb
commit c82c57e69c
4 changed files with 103 additions and 17 deletions

View File

@@ -1,3 +1,9 @@
Mon Jan 12 17:22:57 CET 2004 Daniel Veillard <daniel@veillard.com>
* dict.c parser.c xmlstring.c: some parser optimizations,
xmllint --memory --timing --repeat --stream ./db10000.xml
went down from 16.5 secs to 15.5 secs.
Thu Jan 8 17:57:50 CET 2004 Daniel Veillard <daniel@veillard.com> Thu Jan 8 17:57:50 CET 2004 Daniel Veillard <daniel@veillard.com>
* xmlschemas.c: removed a memory leak remaining from the switch * xmlschemas.c: removed a memory leak remaining from the switch

16
dict.c
View File

@@ -484,14 +484,30 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
} else { } else {
for (insert = &(dict->dict[key]); insert->next != NULL; for (insert = &(dict->dict[key]); insert->next != NULL;
insert = insert->next) { insert = insert->next) {
#ifdef __GNUC__
if (insert->len == len) {
register int tmp = memcmp(insert->name, name, len);
if (!tmp)
return(insert->name);
}
#else
if ((insert->len == len) && if ((insert->len == len) &&
(!xmlStrncmp(insert->name, name, len))) (!xmlStrncmp(insert->name, name, len)))
return(insert->name); return(insert->name);
#endif
nbi++; nbi++;
} }
#ifdef __GNUC__
if (insert->len == len) {
register int tmp = memcmp(insert->name, name, len);
if (!tmp)
return(insert->name);
}
#else
if ((insert->len == len) && if ((insert->len == len) &&
(!xmlStrncmp(insert->name, name, len))) (!xmlStrncmp(insert->name, name, len)))
return(insert->name); return(insert->name);
#endif
} }
ret = xmlDictAddString(dict, name, len); ret = xmlDictAddString(dict, name, len);

View File

@@ -2038,13 +2038,15 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
* @ctxt: an XML parser context * @ctxt: an XML parser context
* @str: a xmlChar * * @str: a xmlChar *
* @len: the size of @str * @len: the size of @str
* @blank_chars: we know the chars are blanks
* *
* Is this a sequence of blank chars that one can ignore ? * Is this a sequence of blank chars that one can ignore ?
* *
* Returns 1 if ignorable 0 otherwise. * Returns 1 if ignorable 0 otherwise.
*/ */
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int blank_chars) {
int i, ret; int i, ret;
xmlNodePtr lastChild; xmlNodePtr lastChild;
@@ -2064,8 +2066,10 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
/* /*
* Check that the string is made of blanks * Check that the string is made of blanks
*/ */
for (i = 0;i < len;i++) if (blank_chars == 0) {
if (!(IS_BLANK_CH(str[i]))) return(0); for (i = 0;i < len;i++)
if (!(IS_BLANK_CH(str[i]))) return(0);
}
/* /*
* Look if the element is mixed content in the DTD if available * Look if the element is mixed content in the DTD if available
@@ -2339,8 +2343,8 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
static const xmlChar * static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
const xmlChar *cmp = other; register const xmlChar *cmp = other;
const xmlChar *in; register const xmlChar *in;
const xmlChar *ret; const xmlChar *ret;
GROW; GROW;
@@ -3106,10 +3110,50 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
if (!cdata) { if (!cdata) {
in = ctxt->input->cur; in = ctxt->input->cur;
do { do {
get_more_space:
while (*in == 0x20) in++;
if (*in == 0xA) {
ctxt->input->line++;
in++;
while (*in == 0xA) {
ctxt->input->line++;
in++;
}
goto get_more_space;
}
if (*in == '<') {
nbchar = in - ctxt->input->cur;
if (nbchar > 0) {
const xmlChar *tmp = ctxt->input->cur;
ctxt->input->cur = in;
if (ctxt->sax->ignorableWhitespace !=
ctxt->sax->characters) {
if (areBlanks(ctxt, tmp, nbchar, 1)) {
ctxt->sax->ignorableWhitespace(ctxt->userData,
tmp, nbchar);
} else if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData,
tmp, nbchar);
} else if (ctxt->sax->characters != NULL) {
ctxt->sax->characters(ctxt->userData,
tmp, nbchar);
}
}
return;
}
get_more: get_more:
#if 0
while (((*in >= 0x20) && (*in != '<') && (*in != ']') && while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
(*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
in++; in++;
#endif
while (((*in > ']') && (*in <= 0x7F)) ||
((*in > '&') && (*in < '<')) ||
((*in > '<') && (*in < ']')) ||
((*in >= 0x20) && (*in < '&')) ||
(*in == 0x09))
in++;
if (*in == 0xA) { if (*in == 0xA) {
ctxt->input->line++; ctxt->input->line++;
in++; in++;
@@ -3136,7 +3180,7 @@ get_more:
const xmlChar *tmp = ctxt->input->cur; const xmlChar *tmp = ctxt->input->cur;
ctxt->input->cur = in; ctxt->input->cur = in;
if (areBlanks(ctxt, tmp, nbchar)) { if (areBlanks(ctxt, tmp, nbchar, 0)) {
ctxt->sax->ignorableWhitespace(ctxt->userData, ctxt->sax->ignorableWhitespace(ctxt->userData,
tmp, nbchar); tmp, nbchar);
} else if (ctxt->sax->characters != NULL) } else if (ctxt->sax->characters != NULL)
@@ -3217,7 +3261,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
* OK the segment is to be consumed as chars. * OK the segment is to be consumed as chars.
*/ */
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) { if (areBlanks(ctxt, buf, nbchar, 0)) {
if (ctxt->sax->ignorableWhitespace != NULL) if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData, ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar); buf, nbchar);
@@ -3242,7 +3286,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
* OK the segment is to be consumed as chars. * OK the segment is to be consumed as chars.
*/ */
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) { if (areBlanks(ctxt, buf, nbchar, 0)) {
if (ctxt->sax->ignorableWhitespace != NULL) if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
} else { } else {
@@ -6689,6 +6733,7 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
/** /**
* xmlParseNCName: * xmlParseNCName:
* @ctxt: an XML parser context * @ctxt: an XML parser context
* @len: lenght of the string parsed
* *
* parse an XML name. * parse an XML name.
* *
@@ -7142,7 +7187,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
static const xmlChar * static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
const xmlChar **URI) { const xmlChar **URI, int *tlen) {
const xmlChar *localname; const xmlChar *localname;
const xmlChar *prefix; const xmlChar *prefix;
const xmlChar *attname; const xmlChar *attname;
@@ -7182,6 +7227,7 @@ reparse:
"StartTag: invalid element name\n"); "StartTag: invalid element name\n");
return(NULL); return(NULL);
} }
*tlen = ctxt->input->cur - ctxt->input->base - cur;
/* /*
* Now parse the attributes, it ends up with the ending * Now parse the attributes, it ends up with the ending
@@ -7507,7 +7553,7 @@ base_changed:
static void static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
const xmlChar *URI, int line, int nsNr) { const xmlChar *URI, int line, int nsNr, int tlen) {
const xmlChar *name; const xmlChar *name;
GROW; GROW;
@@ -7517,7 +7563,19 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
} }
SKIP(2); SKIP(2);
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); if ((tlen > 0) && (memcmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
if (ctxt->input->cur[tlen] == '>') {
ctxt->input->cur += tlen + 1;
goto done;
}
ctxt->input->cur += tlen;
name = (xmlChar*)1;
} else {
if (prefix == NULL)
name = xmlParseNameAndCompare(ctxt, ctxt->name);
else
name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
}
/* /*
* We should definitely be at the ending "S? '>'" part * We should definitely be at the ending "S? '>'" part
@@ -7545,6 +7603,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
/* /*
* SAX: End of Tag * SAX: End of Tag
*/ */
done:
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
(!ctxt->disableSAX)) (!ctxt->disableSAX))
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
@@ -7753,7 +7812,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
const xmlChar *prefix; const xmlChar *prefix;
const xmlChar *URI; const xmlChar *URI;
xmlParserNodeInfo node_info; xmlParserNodeInfo node_info;
int line; int line, tlen;
xmlNodePtr ret; xmlNodePtr ret;
int nsNr = ctxt->nsNr; int nsNr = ctxt->nsNr;
@@ -7773,7 +7832,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
if (ctxt->sax2) if (ctxt->sax2)
#endif /* LIBXML_SAX1_ENABLED */ #endif /* LIBXML_SAX1_ENABLED */
name = xmlParseStartTag2(ctxt, &prefix, &URI); name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
else else
name = xmlParseStartTag(ctxt); name = xmlParseStartTag(ctxt);
@@ -7878,7 +7937,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
* parse the end of tag: '</' should be here. * parse the end of tag: '</' should be here.
*/ */
if (ctxt->sax2) { if (ctxt->sax2) {
xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr); xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
namePop(ctxt); namePop(ctxt);
} }
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
@@ -8734,7 +8793,7 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
static int static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int ret = 0; int ret = 0;
int avail; int avail, tlen;
xmlChar cur, next; xmlChar cur, next;
const xmlChar *lastlt, *lastgt; const xmlChar *lastlt, *lastgt;
@@ -8984,7 +9043,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
if (ctxt->sax2) if (ctxt->sax2)
#endif /* LIBXML_SAX1_ENABLED */ #endif /* LIBXML_SAX1_ENABLED */
name = xmlParseStartTag2(ctxt, &prefix, &URI); name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED
else else
name = xmlParseStartTag(ctxt); name = xmlParseStartTag(ctxt);
@@ -9159,7 +9218,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
xmlParseEndTag2(ctxt, xmlParseEndTag2(ctxt,
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
(void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
(int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]); (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
nameNsPop(ctxt); nameNsPop(ctxt);
} }
#ifdef LIBXML_SAX1_ENABLED #ifdef LIBXML_SAX1_ENABLED

View File

@@ -208,11 +208,16 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
if (str1 == str2) return(0); if (str1 == str2) return(0);
if (str1 == NULL) return(-1); if (str1 == NULL) return(-1);
if (str2 == NULL) return(1); if (str2 == NULL) return(1);
#ifdef __GNUC__
tmp = strncmp(str1, str2, len);
return tmp;
#else
do { do {
tmp = *str1++ - *str2; tmp = *str1++ - *str2;
if (tmp != 0 || --len == 0) return(tmp); if (tmp != 0 || --len == 0) return(tmp);
} while (*str2++ != 0); } while (*str2++ != 0);
return 0; return 0;
#endif
} }
static const xmlChar casemap[256] = { static const xmlChar casemap[256] = {