1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

parser: Improve attribute hash table

There's no need to grow the hash table dynamically. The size is known
which simplifies the implementation.
This commit is contained in:
Nick Wellnhofer
2023-11-05 00:49:40 +01:00
parent 5859849454
commit 70cc45b81f

248
parser.c
View File

@@ -111,7 +111,6 @@ struct _xmlParserNsData {
}; };
struct _xmlAttrHashBucket { struct _xmlAttrHashBucket {
unsigned hashValue;
int index; int index;
}; };
@@ -9393,11 +9392,11 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
/** /**
* xmlAttrHashInsert: * xmlAttrHashInsert:
* @ctxt: parser context * @ctxt: parser context
* @aindex: attribute index (this is a multiple of 5) * @size: size of the hash table
* @sizePtr: size of the hash table (input/output value)
* @name: attribute name * @name: attribute name
* @uri: namespace uri * @uri: namespace uri
* @hashValue: combined hash value of name and uri * @hashValue: combined hash value of name and uri
* @aindex: attribute index (this is a multiple of 5)
* *
* Inserts a new attribute into the hash table. * Inserts a new attribute into the hash table.
* *
@@ -9405,120 +9404,35 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
* index if an attribute was found, -1 if a memory allocation failed. * index if an attribute was found, -1 if a memory allocation failed.
*/ */
static int static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr, xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
const xmlChar *name, const xmlChar *uri, const xmlChar *uri, unsigned hashValue, int aindex) {
unsigned hashValue) {
xmlAttrHashBucket *table = ctxt->attrHash; xmlAttrHashBucket *table = ctxt->attrHash;
xmlAttrHashBucket *bucket; xmlAttrHashBucket *bucket;
unsigned hindex; unsigned hindex;
unsigned size = *sizePtr;
if (size > 0) { hindex = hashValue & (size - 1);
hindex = hashValue & (size - 1); bucket = &table[hindex];
bucket = &table[hindex];
while (bucket->hashValue != 0) { while (bucket->index >= 0) {
const xmlChar **atts = &ctxt->atts[bucket->index]; const xmlChar **atts = &ctxt->atts[bucket->index];
if (name == atts[0]) { if (name == atts[0]) {
int nsIndex = (int) (ptrdiff_t) atts[2]; int nsIndex = (int) (ptrdiff_t) atts[2];
if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
(nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) : (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
(uri == ctxt->nsTab[nsIndex * 2 + 1])) (uri == ctxt->nsTab[nsIndex * 2 + 1]))
return(bucket->index); return(bucket->index);
}
hindex++;
bucket++;
if (hindex >= size) {
hindex = 0;
bucket = table;
}
}
}
/*
* Grow hash table
*/
if ((unsigned) aindex / 5 >= size / 2) {
xmlAttrHashBucket *newTable;
unsigned newSize, i, nindex;
newSize = size ? size * 2 : 8;
if (newSize > ctxt->attrHashMax) {
newTable = xmlRealloc(table, newSize * sizeof(newTable[0]));
if (newTable == NULL) {
xmlErrMemory(ctxt, NULL);
return(-1);
}
table = newTable;
ctxt->attrHash = newTable;
ctxt->attrHashMax = newSize;
} }
memset(&table[size], 0, (newSize - size) * sizeof(table[0])); hindex++;
bucket++;
if (size > 0) { if (hindex >= size) {
/*
* We must search for the start of a probe sequence to make
* in-place operation work.
*/
hindex = 0; hindex = 0;
bucket = table; bucket = table;
while (bucket->hashValue != 0) {
hindex++;
bucket++;
}
for (i = 0; i < size; i++) {
if (bucket->hashValue != 0) {
nindex = bucket->hashValue & (newSize - 1);
while (nindex != hindex) {
if (table[nindex].hashValue == 0) {
table[nindex] = *bucket;
bucket->hashValue = 0;
break;
}
nindex++;
if (nindex >= newSize)
nindex = 0;
}
}
hindex++;
bucket++;
if (hindex >= size) {
hindex = 0;
bucket = table;
}
}
}
size = newSize;
*sizePtr = newSize;
/*
* Relookup
*/
hindex = hashValue & (size - 1);
bucket = &table[hindex];
while (bucket->hashValue != 0) {
hindex++;
bucket++;
if (hindex >= size) {
hindex = 0;
bucket = table;
}
} }
} }
bucket->hashValue = hashValue;
bucket->index = aindex; bucket->index = aindex;
return(INT_MAX); return(INT_MAX);
@@ -9570,7 +9484,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
unsigned attrHashSize = 0; unsigned attrHashSize = 0;
int maxatts = ctxt->maxatts; int maxatts = ctxt->maxatts;
int nratts, nbatts, nbdef, inputid; int nratts, nbatts, nbdef, inputid;
int i, j, nbNs, attval, nsIndex; int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
int alloc = 0; int alloc = 0;
if (RAW != '<') return(NULL); if (RAW != '<') return(NULL);
@@ -9581,6 +9495,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
nratts = 0; nratts = 0;
nbdef = 0; nbdef = 0;
nbNs = 0; nbNs = 0;
nbTotalDef = 0;
attval = 0; attval = 0;
if (xmlParserNsStartElement(ctxt->nsdb) < 0) { if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
@@ -9848,7 +9763,9 @@ next_attr:
if (xmlParserNsPush(ctxt, &attr->name, &attr->value, if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
NULL, 1) > 0) NULL, 1) > 0)
nbNs++; nbNs++;
} } else {
nbTotalDef += 1;
}
} }
} }
} }
@@ -9882,52 +9799,78 @@ next_attr:
atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex; atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
} }
/*
* Maximum number of attributes including default attributes.
*/
maxAtts = nratts + nbTotalDef;
/* /*
* Verify that attribute names are unique. * Verify that attribute names are unique.
*/ */
for (i = 0, j = 0; j < nratts; i += 5, j++) { if (maxAtts > 1) {
const xmlChar *nsuri; attrHashSize = 4;
unsigned hashValue, nameHashValue, uriHashValue; while (attrHashSize / 2 < (unsigned) maxAtts)
int res; attrHashSize *= 2;
attname = atts[i]; if (attrHashSize > ctxt->attrHashMax) {
aprefix = atts[i+1]; xmlAttrHashBucket *tmp;
nsIndex = (ptrdiff_t) atts[i+2];
/* Hash values always have bit 31 set, see dict.c */
nameHashValue = ctxt->attallocs[j] | 0x80000000;
if (nsIndex == NS_INDEX_EMPTY) { tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
nsuri = NULL; if (tmp == NULL) {
uriHashValue = URI_HASH_EMPTY; xmlErrMemory(ctxt, NULL);
} else if (nsIndex == NS_INDEX_XML) { goto done;
nsuri = ctxt->str_xml_ns; }
uriHashValue = URI_HASH_XML;
} else { ctxt->attrHash = tmp;
nsuri = ctxt->nsTab[nsIndex * 2 + 1]; ctxt->attrHashMax = attrHashSize;
uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
} }
hashValue = xmlDictCombineHash(nameHashValue, uriHashValue); memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri,
hashValue);
if (res < 0)
continue;
/* for (i = 0, j = 0; j < nratts; i += 5, j++) {
* [ WFC: Unique Att Spec ] const xmlChar *nsuri;
* No attribute name may appear more than once in the same unsigned hashValue, nameHashValue, uriHashValue;
* start-tag or empty-element tag. int res;
* As extended by the Namespace in XML REC.
*/ attname = atts[i];
if (res < INT_MAX) { aprefix = atts[i+1];
if (aprefix == atts[res+1]) { nsIndex = (ptrdiff_t) atts[i+2];
xmlErrAttributeDup(ctxt, aprefix, attname); /* Hash values always have bit 31 set, see dict.c */
nameHashValue = ctxt->attallocs[j] | 0x80000000;
if (nsIndex == NS_INDEX_EMPTY) {
nsuri = NULL;
uriHashValue = URI_HASH_EMPTY;
} else if (nsIndex == NS_INDEX_XML) {
nsuri = ctxt->str_xml_ns;
uriHashValue = URI_HASH_XML;
} else { } else {
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, nsuri = ctxt->nsTab[nsIndex * 2 + 1];
"Namespaced Attribute %s in '%s' redefined\n", uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
attname, nsuri, NULL);
} }
}
hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
hashValue, i);
if (res < 0)
continue;
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
* start-tag or empty-element tag.
* As extended by the Namespace in XML REC.
*/
if (res < INT_MAX) {
if (aprefix == atts[res+1]) {
xmlErrAttributeDup(ctxt, aprefix, attname);
} else {
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
"Namespaced Attribute %s in '%s' redefined\n",
attname, nsuri, NULL);
}
}
}
} }
/* /*
@@ -9979,17 +9922,20 @@ next_attr:
/* /*
* Check whether the attribute exists * Check whether the attribute exists
*/ */
hashValue = xmlDictCombineHash(attr->name.hashValue, uriHashValue); if (maxAtts > 1) {
res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname, hashValue = xmlDictCombineHash(attr->name.hashValue,
nsuri, hashValue); uriHashValue);
if (res < 0) res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
continue; hashValue, nbatts);
if (res < INT_MAX) { if (res < 0)
if (aprefix == atts[res+1])
continue; continue;
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, if (res < INT_MAX) {
"Namespaced Attribute %s in '%s' redefined\n", if (aprefix == atts[res+1])
attname, nsuri, NULL); continue;
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
"Namespaced Attribute %s in '%s' redefined\n",
attname, nsuri, NULL);
}
} }
xmlParserEntityCheck(ctxt, attr->expandedSize); xmlParserEntityCheck(ctxt, attr->expandedSize);