mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-26 00:37:43 +03:00
parser: Improve attribute hash table
There's no need to grow the hash table dynamically. The size is known which simplifies the implementation.
This commit is contained in:
248
parser.c
248
parser.c
@@ -111,7 +111,6 @@ struct _xmlParserNsData {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct _xmlAttrHashBucket {
|
struct _xmlAttrHashBucket {
|
||||||
unsigned hashValue;
|
|
||||||
int index;
|
int index;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -9393,11 +9392,11 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
|
|||||||
/**
|
/**
|
||||||
* xmlAttrHashInsert:
|
* xmlAttrHashInsert:
|
||||||
* @ctxt: parser context
|
* @ctxt: parser context
|
||||||
* @aindex: attribute index (this is a multiple of 5)
|
* @size: size of the hash table
|
||||||
* @sizePtr: size of the hash table (input/output value)
|
|
||||||
* @name: attribute name
|
* @name: attribute name
|
||||||
* @uri: namespace uri
|
* @uri: namespace uri
|
||||||
* @hashValue: combined hash value of name and uri
|
* @hashValue: combined hash value of name and uri
|
||||||
|
* @aindex: attribute index (this is a multiple of 5)
|
||||||
*
|
*
|
||||||
* Inserts a new attribute into the hash table.
|
* Inserts a new attribute into the hash table.
|
||||||
*
|
*
|
||||||
@@ -9405,120 +9404,35 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
|
|||||||
* index if an attribute was found, -1 if a memory allocation failed.
|
* index if an attribute was found, -1 if a memory allocation failed.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr,
|
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
|
||||||
const xmlChar *name, const xmlChar *uri,
|
const xmlChar *uri, unsigned hashValue, int aindex) {
|
||||||
unsigned hashValue) {
|
|
||||||
xmlAttrHashBucket *table = ctxt->attrHash;
|
xmlAttrHashBucket *table = ctxt->attrHash;
|
||||||
xmlAttrHashBucket *bucket;
|
xmlAttrHashBucket *bucket;
|
||||||
unsigned hindex;
|
unsigned hindex;
|
||||||
unsigned size = *sizePtr;
|
|
||||||
|
|
||||||
if (size > 0) {
|
hindex = hashValue & (size - 1);
|
||||||
hindex = hashValue & (size - 1);
|
bucket = &table[hindex];
|
||||||
bucket = &table[hindex];
|
|
||||||
|
|
||||||
while (bucket->hashValue != 0) {
|
while (bucket->index >= 0) {
|
||||||
const xmlChar **atts = &ctxt->atts[bucket->index];
|
const xmlChar **atts = &ctxt->atts[bucket->index];
|
||||||
|
|
||||||
if (name == atts[0]) {
|
if (name == atts[0]) {
|
||||||
int nsIndex = (int) (ptrdiff_t) atts[2];
|
int nsIndex = (int) (ptrdiff_t) atts[2];
|
||||||
|
|
||||||
if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
|
if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
|
||||||
(nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
|
(nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
|
||||||
(uri == ctxt->nsTab[nsIndex * 2 + 1]))
|
(uri == ctxt->nsTab[nsIndex * 2 + 1]))
|
||||||
return(bucket->index);
|
return(bucket->index);
|
||||||
}
|
|
||||||
|
|
||||||
hindex++;
|
|
||||||
bucket++;
|
|
||||||
if (hindex >= size) {
|
|
||||||
hindex = 0;
|
|
||||||
bucket = table;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Grow hash table
|
|
||||||
*/
|
|
||||||
if ((unsigned) aindex / 5 >= size / 2) {
|
|
||||||
xmlAttrHashBucket *newTable;
|
|
||||||
unsigned newSize, i, nindex;
|
|
||||||
|
|
||||||
newSize = size ? size * 2 : 8;
|
|
||||||
|
|
||||||
if (newSize > ctxt->attrHashMax) {
|
|
||||||
newTable = xmlRealloc(table, newSize * sizeof(newTable[0]));
|
|
||||||
if (newTable == NULL) {
|
|
||||||
xmlErrMemory(ctxt, NULL);
|
|
||||||
return(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
table = newTable;
|
|
||||||
ctxt->attrHash = newTable;
|
|
||||||
ctxt->attrHashMax = newSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&table[size], 0, (newSize - size) * sizeof(table[0]));
|
hindex++;
|
||||||
|
bucket++;
|
||||||
if (size > 0) {
|
if (hindex >= size) {
|
||||||
/*
|
|
||||||
* We must search for the start of a probe sequence to make
|
|
||||||
* in-place operation work.
|
|
||||||
*/
|
|
||||||
hindex = 0;
|
hindex = 0;
|
||||||
bucket = table;
|
bucket = table;
|
||||||
while (bucket->hashValue != 0) {
|
|
||||||
hindex++;
|
|
||||||
bucket++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
if (bucket->hashValue != 0) {
|
|
||||||
nindex = bucket->hashValue & (newSize - 1);
|
|
||||||
|
|
||||||
while (nindex != hindex) {
|
|
||||||
if (table[nindex].hashValue == 0) {
|
|
||||||
table[nindex] = *bucket;
|
|
||||||
bucket->hashValue = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
nindex++;
|
|
||||||
if (nindex >= newSize)
|
|
||||||
nindex = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
hindex++;
|
|
||||||
bucket++;
|
|
||||||
if (hindex >= size) {
|
|
||||||
hindex = 0;
|
|
||||||
bucket = table;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size = newSize;
|
|
||||||
*sizePtr = newSize;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Relookup
|
|
||||||
*/
|
|
||||||
hindex = hashValue & (size - 1);
|
|
||||||
bucket = &table[hindex];
|
|
||||||
|
|
||||||
while (bucket->hashValue != 0) {
|
|
||||||
hindex++;
|
|
||||||
bucket++;
|
|
||||||
if (hindex >= size) {
|
|
||||||
hindex = 0;
|
|
||||||
bucket = table;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bucket->hashValue = hashValue;
|
|
||||||
bucket->index = aindex;
|
bucket->index = aindex;
|
||||||
|
|
||||||
return(INT_MAX);
|
return(INT_MAX);
|
||||||
@@ -9570,7 +9484,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
|
|||||||
unsigned attrHashSize = 0;
|
unsigned attrHashSize = 0;
|
||||||
int maxatts = ctxt->maxatts;
|
int maxatts = ctxt->maxatts;
|
||||||
int nratts, nbatts, nbdef, inputid;
|
int nratts, nbatts, nbdef, inputid;
|
||||||
int i, j, nbNs, attval, nsIndex;
|
int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
|
||||||
int alloc = 0;
|
int alloc = 0;
|
||||||
|
|
||||||
if (RAW != '<') return(NULL);
|
if (RAW != '<') return(NULL);
|
||||||
@@ -9581,6 +9495,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
|
|||||||
nratts = 0;
|
nratts = 0;
|
||||||
nbdef = 0;
|
nbdef = 0;
|
||||||
nbNs = 0;
|
nbNs = 0;
|
||||||
|
nbTotalDef = 0;
|
||||||
attval = 0;
|
attval = 0;
|
||||||
|
|
||||||
if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
|
if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
|
||||||
@@ -9848,7 +9763,9 @@ next_attr:
|
|||||||
if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
|
if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
|
||||||
NULL, 1) > 0)
|
NULL, 1) > 0)
|
||||||
nbNs++;
|
nbNs++;
|
||||||
}
|
} else {
|
||||||
|
nbTotalDef += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -9882,52 +9799,78 @@ next_attr:
|
|||||||
atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
|
atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum number of attributes including default attributes.
|
||||||
|
*/
|
||||||
|
maxAtts = nratts + nbTotalDef;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify that attribute names are unique.
|
* Verify that attribute names are unique.
|
||||||
*/
|
*/
|
||||||
for (i = 0, j = 0; j < nratts; i += 5, j++) {
|
if (maxAtts > 1) {
|
||||||
const xmlChar *nsuri;
|
attrHashSize = 4;
|
||||||
unsigned hashValue, nameHashValue, uriHashValue;
|
while (attrHashSize / 2 < (unsigned) maxAtts)
|
||||||
int res;
|
attrHashSize *= 2;
|
||||||
|
|
||||||
attname = atts[i];
|
if (attrHashSize > ctxt->attrHashMax) {
|
||||||
aprefix = atts[i+1];
|
xmlAttrHashBucket *tmp;
|
||||||
nsIndex = (ptrdiff_t) atts[i+2];
|
|
||||||
/* Hash values always have bit 31 set, see dict.c */
|
|
||||||
nameHashValue = ctxt->attallocs[j] | 0x80000000;
|
|
||||||
|
|
||||||
if (nsIndex == NS_INDEX_EMPTY) {
|
tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
|
||||||
nsuri = NULL;
|
if (tmp == NULL) {
|
||||||
uriHashValue = URI_HASH_EMPTY;
|
xmlErrMemory(ctxt, NULL);
|
||||||
} else if (nsIndex == NS_INDEX_XML) {
|
goto done;
|
||||||
nsuri = ctxt->str_xml_ns;
|
}
|
||||||
uriHashValue = URI_HASH_XML;
|
|
||||||
} else {
|
ctxt->attrHash = tmp;
|
||||||
nsuri = ctxt->nsTab[nsIndex * 2 + 1];
|
ctxt->attrHashMax = attrHashSize;
|
||||||
uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
|
memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
|
||||||
res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri,
|
|
||||||
hashValue);
|
|
||||||
if (res < 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
for (i = 0, j = 0; j < nratts; i += 5, j++) {
|
||||||
* [ WFC: Unique Att Spec ]
|
const xmlChar *nsuri;
|
||||||
* No attribute name may appear more than once in the same
|
unsigned hashValue, nameHashValue, uriHashValue;
|
||||||
* start-tag or empty-element tag.
|
int res;
|
||||||
* As extended by the Namespace in XML REC.
|
|
||||||
*/
|
attname = atts[i];
|
||||||
if (res < INT_MAX) {
|
aprefix = atts[i+1];
|
||||||
if (aprefix == atts[res+1]) {
|
nsIndex = (ptrdiff_t) atts[i+2];
|
||||||
xmlErrAttributeDup(ctxt, aprefix, attname);
|
/* Hash values always have bit 31 set, see dict.c */
|
||||||
|
nameHashValue = ctxt->attallocs[j] | 0x80000000;
|
||||||
|
|
||||||
|
if (nsIndex == NS_INDEX_EMPTY) {
|
||||||
|
nsuri = NULL;
|
||||||
|
uriHashValue = URI_HASH_EMPTY;
|
||||||
|
} else if (nsIndex == NS_INDEX_XML) {
|
||||||
|
nsuri = ctxt->str_xml_ns;
|
||||||
|
uriHashValue = URI_HASH_XML;
|
||||||
} else {
|
} else {
|
||||||
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
|
nsuri = ctxt->nsTab[nsIndex * 2 + 1];
|
||||||
"Namespaced Attribute %s in '%s' redefined\n",
|
uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
|
||||||
attname, nsuri, NULL);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
|
||||||
|
res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
|
||||||
|
hashValue, i);
|
||||||
|
if (res < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* [ WFC: Unique Att Spec ]
|
||||||
|
* No attribute name may appear more than once in the same
|
||||||
|
* start-tag or empty-element tag.
|
||||||
|
* As extended by the Namespace in XML REC.
|
||||||
|
*/
|
||||||
|
if (res < INT_MAX) {
|
||||||
|
if (aprefix == atts[res+1]) {
|
||||||
|
xmlErrAttributeDup(ctxt, aprefix, attname);
|
||||||
|
} else {
|
||||||
|
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
|
||||||
|
"Namespaced Attribute %s in '%s' redefined\n",
|
||||||
|
attname, nsuri, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -9979,17 +9922,20 @@ next_attr:
|
|||||||
/*
|
/*
|
||||||
* Check whether the attribute exists
|
* Check whether the attribute exists
|
||||||
*/
|
*/
|
||||||
hashValue = xmlDictCombineHash(attr->name.hashValue, uriHashValue);
|
if (maxAtts > 1) {
|
||||||
res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname,
|
hashValue = xmlDictCombineHash(attr->name.hashValue,
|
||||||
nsuri, hashValue);
|
uriHashValue);
|
||||||
if (res < 0)
|
res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
|
||||||
continue;
|
hashValue, nbatts);
|
||||||
if (res < INT_MAX) {
|
if (res < 0)
|
||||||
if (aprefix == atts[res+1])
|
|
||||||
continue;
|
continue;
|
||||||
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
|
if (res < INT_MAX) {
|
||||||
"Namespaced Attribute %s in '%s' redefined\n",
|
if (aprefix == atts[res+1])
|
||||||
attname, nsuri, NULL);
|
continue;
|
||||||
|
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
|
||||||
|
"Namespaced Attribute %s in '%s' redefined\n",
|
||||||
|
attname, nsuri, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlParserEntityCheck(ctxt, attr->expandedSize);
|
xmlParserEntityCheck(ctxt, attr->expandedSize);
|
||||||
|
|||||||
Reference in New Issue
Block a user