1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-26 00:37:43 +03:00

parser: Use hash tables to avoid quadratic behavior

Use a hash table to lookup namespaces by prefix. The hash table stores
an index into the namespace table. Auxiliary data for namespaces is
stored in a separate array along the main namespace table.

Use a hash table to verify attribute uniqueness. The hash table stores
an index into the attribute table.

Reuse hash value from the dictionary to avoid computing them twice.

See #346.
This commit is contained in:
Nick Wellnhofer
2023-09-29 00:18:44 +02:00
parent e48f3d8e0a
commit e0dd330b8f
5 changed files with 1051 additions and 374 deletions

View File

@@ -172,6 +172,8 @@ typedef enum {
} xmlParserMode;
typedef struct _xmlStartTag xmlStartTag;
typedef struct _xmlParserNsData xmlParserNsData;
typedef struct _xmlAttrHashBucket xmlAttrHashBucket;
/**
* xmlParserCtxt:
@@ -282,7 +284,7 @@ struct _xmlParserCtxt {
int nsNr; /* the number of inherited namespaces */
int nsMax; /* the size of the arrays */
const xmlChar * *nsTab; /* the array of prefix/namespace name */
int *attallocs; /* which attribute were allocated */
unsigned *attallocs; /* which attribute were allocated */
xmlStartTag *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
@@ -319,6 +321,10 @@ struct _xmlParserCtxt {
unsigned short nbErrors; /* number of errors */
unsigned short nbWarnings; /* number of warnings */
unsigned maxAmpl; /* maximum amplification factor */
xmlParserNsData *nsdb; /* namespace database */
unsigned attrHashMax; /* allocated size */
xmlAttrHashBucket *attrHash; /* atttribute hash table */
};
/**