mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	parser: Improve attribute hash table
There's no need to grow the hash table dynamically. The size is known which simplifies the implementation.
This commit is contained in:
		
							
								
								
									
										248
									
								
								parser.c
									
									
									
									
									
								
							
							
						
						
									
										248
									
								
								parser.c
									
									
									
									
									
								
							| @@ -111,7 +111,6 @@ struct _xmlParserNsData { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| struct _xmlAttrHashBucket { | struct _xmlAttrHashBucket { | ||||||
|     unsigned hashValue; |  | ||||||
|     int index; |     int index; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -9393,11 +9392,11 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, | |||||||
| /** | /** | ||||||
|  * xmlAttrHashInsert: |  * xmlAttrHashInsert: | ||||||
|  * @ctxt: parser context |  * @ctxt: parser context | ||||||
|  * @aindex: attribute index (this is a multiple of 5) |  * @size: size of the hash table | ||||||
|  * @sizePtr: size of the hash table (input/output value) |  | ||||||
|  * @name: attribute name |  * @name: attribute name | ||||||
|  * @uri: namespace uri |  * @uri: namespace uri | ||||||
|  * @hashValue: combined hash value of name and uri |  * @hashValue: combined hash value of name and uri | ||||||
|  |  * @aindex: attribute index (this is a multiple of 5) | ||||||
|  * |  * | ||||||
|  * Inserts a new attribute into the hash table. |  * Inserts a new attribute into the hash table. | ||||||
|  * |  * | ||||||
| @@ -9405,120 +9404,35 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, | |||||||
|  * index if an attribute was found, -1 if a memory allocation failed. |  * index if an attribute was found, -1 if a memory allocation failed. | ||||||
|  */ |  */ | ||||||
| static int | static int | ||||||
| xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr, | xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name, | ||||||
|                   const xmlChar *name, const xmlChar *uri, |                   const xmlChar *uri, unsigned hashValue, int aindex) { | ||||||
|                   unsigned hashValue) { |  | ||||||
|     xmlAttrHashBucket *table = ctxt->attrHash; |     xmlAttrHashBucket *table = ctxt->attrHash; | ||||||
|     xmlAttrHashBucket *bucket; |     xmlAttrHashBucket *bucket; | ||||||
|     unsigned hindex; |     unsigned hindex; | ||||||
|     unsigned size = *sizePtr; |  | ||||||
|  |  | ||||||
|     if (size > 0) { |     hindex = hashValue & (size - 1); | ||||||
|         hindex = hashValue & (size - 1); |     bucket = &table[hindex]; | ||||||
|         bucket = &table[hindex]; |  | ||||||
|  |  | ||||||
|         while (bucket->hashValue != 0) { |     while (bucket->index >= 0) { | ||||||
|             const xmlChar **atts = &ctxt->atts[bucket->index]; |         const xmlChar **atts = &ctxt->atts[bucket->index]; | ||||||
|  |  | ||||||
|             if (name == atts[0]) { |         if (name == atts[0]) { | ||||||
|                 int nsIndex = (int) (ptrdiff_t) atts[2]; |             int nsIndex = (int) (ptrdiff_t) atts[2]; | ||||||
|  |  | ||||||
|                 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : |             if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : | ||||||
|                     (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) : |                 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) : | ||||||
|                     (uri == ctxt->nsTab[nsIndex * 2 + 1])) |                 (uri == ctxt->nsTab[nsIndex * 2 + 1])) | ||||||
|                     return(bucket->index); |                 return(bucket->index); | ||||||
|             } |  | ||||||
|  |  | ||||||
|             hindex++; |  | ||||||
|             bucket++; |  | ||||||
|             if (hindex >= size) { |  | ||||||
|                 hindex = 0; |  | ||||||
|                 bucket = table; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /* |  | ||||||
|      * Grow hash table |  | ||||||
|      */ |  | ||||||
|     if ((unsigned) aindex / 5 >= size / 2) { |  | ||||||
|         xmlAttrHashBucket *newTable; |  | ||||||
|         unsigned newSize, i, nindex; |  | ||||||
|  |  | ||||||
|         newSize = size ? size * 2 : 8; |  | ||||||
|  |  | ||||||
|         if (newSize > ctxt->attrHashMax) { |  | ||||||
|             newTable = xmlRealloc(table, newSize * sizeof(newTable[0])); |  | ||||||
|             if (newTable == NULL) { |  | ||||||
|                 xmlErrMemory(ctxt, NULL); |  | ||||||
|                 return(-1); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             table = newTable; |  | ||||||
|             ctxt->attrHash = newTable; |  | ||||||
|             ctxt->attrHashMax = newSize; |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         memset(&table[size], 0, (newSize - size) * sizeof(table[0])); |         hindex++; | ||||||
|  |         bucket++; | ||||||
|         if (size > 0) { |         if (hindex >= size) { | ||||||
|             /* |  | ||||||
|              * We must search for the start of a probe sequence to make |  | ||||||
|              * in-place operation work. |  | ||||||
|              */ |  | ||||||
|             hindex = 0; |             hindex = 0; | ||||||
|             bucket = table; |             bucket = table; | ||||||
|             while (bucket->hashValue != 0) { |  | ||||||
|                 hindex++; |  | ||||||
|                 bucket++; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             for (i = 0; i < size; i++) { |  | ||||||
|                 if (bucket->hashValue != 0) { |  | ||||||
|                     nindex = bucket->hashValue & (newSize - 1); |  | ||||||
|  |  | ||||||
|                     while (nindex != hindex) { |  | ||||||
|                         if (table[nindex].hashValue == 0) { |  | ||||||
|                             table[nindex] = *bucket; |  | ||||||
|                             bucket->hashValue = 0; |  | ||||||
|                             break; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         nindex++; |  | ||||||
|                         if (nindex >= newSize) |  | ||||||
|                             nindex = 0; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 hindex++; |  | ||||||
|                 bucket++; |  | ||||||
|                 if (hindex >= size) { |  | ||||||
|                     hindex = 0; |  | ||||||
|                     bucket = table; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         size = newSize; |  | ||||||
|         *sizePtr = newSize; |  | ||||||
|  |  | ||||||
|         /* |  | ||||||
|          * Relookup |  | ||||||
|          */ |  | ||||||
|         hindex = hashValue & (size - 1); |  | ||||||
|         bucket = &table[hindex]; |  | ||||||
|  |  | ||||||
|         while (bucket->hashValue != 0) { |  | ||||||
|             hindex++; |  | ||||||
|             bucket++; |  | ||||||
|             if (hindex >= size) { |  | ||||||
|                 hindex = 0; |  | ||||||
|                 bucket = table; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     bucket->hashValue = hashValue; |  | ||||||
|     bucket->index = aindex; |     bucket->index = aindex; | ||||||
|  |  | ||||||
|     return(INT_MAX); |     return(INT_MAX); | ||||||
| @@ -9570,7 +9484,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, | |||||||
|     unsigned attrHashSize = 0; |     unsigned attrHashSize = 0; | ||||||
|     int maxatts = ctxt->maxatts; |     int maxatts = ctxt->maxatts; | ||||||
|     int nratts, nbatts, nbdef, inputid; |     int nratts, nbatts, nbdef, inputid; | ||||||
|     int i, j, nbNs, attval, nsIndex; |     int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts; | ||||||
|     int alloc = 0; |     int alloc = 0; | ||||||
|  |  | ||||||
|     if (RAW != '<') return(NULL); |     if (RAW != '<') return(NULL); | ||||||
| @@ -9581,6 +9495,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, | |||||||
|     nratts = 0; |     nratts = 0; | ||||||
|     nbdef = 0; |     nbdef = 0; | ||||||
|     nbNs = 0; |     nbNs = 0; | ||||||
|  |     nbTotalDef = 0; | ||||||
|     attval = 0; |     attval = 0; | ||||||
|  |  | ||||||
|     if (xmlParserNsStartElement(ctxt->nsdb) < 0) { |     if (xmlParserNsStartElement(ctxt->nsdb) < 0) { | ||||||
| @@ -9848,7 +9763,9 @@ next_attr: | |||||||
|                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value, |                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value, | ||||||
|                                       NULL, 1) > 0) |                                       NULL, 1) > 0) | ||||||
|                         nbNs++; |                         nbNs++; | ||||||
| 		} | 		} else { | ||||||
|  |                     nbTotalDef += 1; | ||||||
|  |                 } | ||||||
| 	    } | 	    } | ||||||
| 	} | 	} | ||||||
|     } |     } | ||||||
| @@ -9882,52 +9799,78 @@ next_attr: | |||||||
|         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex; |         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /* | ||||||
|  |      * Maximum number of attributes including default attributes. | ||||||
|  |      */ | ||||||
|  |     maxAtts = nratts + nbTotalDef; | ||||||
|  |  | ||||||
|     /* |     /* | ||||||
|      * Verify that attribute names are unique. |      * Verify that attribute names are unique. | ||||||
|      */ |      */ | ||||||
|     for (i = 0, j = 0; j < nratts; i += 5, j++) { |     if (maxAtts > 1) { | ||||||
|         const xmlChar *nsuri; |         attrHashSize = 4; | ||||||
|         unsigned hashValue, nameHashValue, uriHashValue; |         while (attrHashSize / 2 < (unsigned) maxAtts) | ||||||
|         int res; |             attrHashSize *= 2; | ||||||
|  |  | ||||||
|         attname = atts[i]; |         if (attrHashSize > ctxt->attrHashMax) { | ||||||
|         aprefix = atts[i+1]; |             xmlAttrHashBucket *tmp; | ||||||
|         nsIndex = (ptrdiff_t) atts[i+2]; |  | ||||||
|         /* Hash values always have bit 31 set, see dict.c */ |  | ||||||
|         nameHashValue = ctxt->attallocs[j] | 0x80000000; |  | ||||||
|  |  | ||||||
|         if (nsIndex == NS_INDEX_EMPTY) { |             tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0])); | ||||||
|             nsuri = NULL; |             if (tmp == NULL) { | ||||||
|             uriHashValue = URI_HASH_EMPTY; |                 xmlErrMemory(ctxt, NULL); | ||||||
|         } else if (nsIndex == NS_INDEX_XML) { |                 goto done; | ||||||
|             nsuri = ctxt->str_xml_ns; |             } | ||||||
|             uriHashValue = URI_HASH_XML; |  | ||||||
|         } else { |             ctxt->attrHash = tmp; | ||||||
|             nsuri = ctxt->nsTab[nsIndex * 2 + 1]; |             ctxt->attrHashMax = attrHashSize; | ||||||
|             uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         hashValue = xmlDictCombineHash(nameHashValue, uriHashValue); |         memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0])); | ||||||
|         res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri, |  | ||||||
|                                 hashValue); |  | ||||||
|         if (res < 0) |  | ||||||
|             continue; |  | ||||||
|  |  | ||||||
| 	/* |         for (i = 0, j = 0; j < nratts; i += 5, j++) { | ||||||
| 	 * [ WFC: Unique Att Spec ] |             const xmlChar *nsuri; | ||||||
| 	 * No attribute name may appear more than once in the same |             unsigned hashValue, nameHashValue, uriHashValue; | ||||||
| 	 * start-tag or empty-element tag. |             int res; | ||||||
| 	 * As extended by the Namespace in XML REC. |  | ||||||
| 	 */ |             attname = atts[i]; | ||||||
|         if (res < INT_MAX) { |             aprefix = atts[i+1]; | ||||||
|             if (aprefix == atts[res+1]) { |             nsIndex = (ptrdiff_t) atts[i+2]; | ||||||
|                 xmlErrAttributeDup(ctxt, aprefix, attname); |             /* Hash values always have bit 31 set, see dict.c */ | ||||||
|  |             nameHashValue = ctxt->attallocs[j] | 0x80000000; | ||||||
|  |  | ||||||
|  |             if (nsIndex == NS_INDEX_EMPTY) { | ||||||
|  |                 nsuri = NULL; | ||||||
|  |                 uriHashValue = URI_HASH_EMPTY; | ||||||
|  |             } else if (nsIndex == NS_INDEX_XML) { | ||||||
|  |                 nsuri = ctxt->str_xml_ns; | ||||||
|  |                 uriHashValue = URI_HASH_XML; | ||||||
|             } else { |             } else { | ||||||
|                 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, |                 nsuri = ctxt->nsTab[nsIndex * 2 + 1]; | ||||||
|                          "Namespaced Attribute %s in '%s' redefined\n", |                 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; | ||||||
|                          attname, nsuri, NULL); |  | ||||||
|             } |             } | ||||||
| 	} |  | ||||||
|  |             hashValue = xmlDictCombineHash(nameHashValue, uriHashValue); | ||||||
|  |             res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri, | ||||||
|  |                                     hashValue, i); | ||||||
|  |             if (res < 0) | ||||||
|  |                 continue; | ||||||
|  |  | ||||||
|  |             /* | ||||||
|  |              * [ WFC: Unique Att Spec ] | ||||||
|  |              * No attribute name may appear more than once in the same | ||||||
|  |              * start-tag or empty-element tag. | ||||||
|  |              * As extended by the Namespace in XML REC. | ||||||
|  |              */ | ||||||
|  |             if (res < INT_MAX) { | ||||||
|  |                 if (aprefix == atts[res+1]) { | ||||||
|  |                     xmlErrAttributeDup(ctxt, aprefix, attname); | ||||||
|  |                 } else { | ||||||
|  |                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, | ||||||
|  |                              "Namespaced Attribute %s in '%s' redefined\n", | ||||||
|  |                              attname, nsuri, NULL); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* |     /* | ||||||
| @@ -9979,17 +9922,20 @@ next_attr: | |||||||
|                 /* |                 /* | ||||||
|                  * Check whether the attribute exists |                  * Check whether the attribute exists | ||||||
|                  */ |                  */ | ||||||
|                 hashValue = xmlDictCombineHash(attr->name.hashValue, uriHashValue); |                 if (maxAtts > 1) { | ||||||
|                 res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname, |                     hashValue = xmlDictCombineHash(attr->name.hashValue, | ||||||
|                                         nsuri, hashValue); |                                                    uriHashValue); | ||||||
|                 if (res < 0) |                     res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri, | ||||||
|                     continue; |                                             hashValue, nbatts); | ||||||
|                 if (res < INT_MAX) { |                     if (res < 0) | ||||||
|                     if (aprefix == atts[res+1]) |  | ||||||
|                         continue; |                         continue; | ||||||
|                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, |                     if (res < INT_MAX) { | ||||||
|                              "Namespaced Attribute %s in '%s' redefined\n", |                         if (aprefix == atts[res+1]) | ||||||
|                              attname, nsuri, NULL); |                             continue; | ||||||
|  |                         xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, | ||||||
|  |                                  "Namespaced Attribute %s in '%s' redefined\n", | ||||||
|  |                                  attname, nsuri, NULL); | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 xmlParserEntityCheck(ctxt, attr->expandedSize); |                 xmlParserEntityCheck(ctxt, attr->expandedSize); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user