diff --git a/SAX2.c b/SAX2.c index e9cd21b3..e20ec886 100644 --- a/SAX2.c +++ b/SAX2.c @@ -1870,8 +1870,12 @@ xmlSAX2AttributeNs(xmlParserCtxtPtr ctxt, /* * Note: if prefix == NULL, the attribute is not in the default namespace */ - if (prefix != NULL) - namespace = xmlSearchNs(ctxt->myDoc, ctxt->node, prefix); + if (prefix != NULL) { + namespace = xmlParserNsLookupSax(ctxt, prefix); + if ((namespace == NULL) && (xmlStrEqual(prefix, BAD_CAST "xml"))) { + namespace = xmlSearchNs(ctxt->myDoc, ctxt->node, prefix); + } + } /* * allocate the node @@ -2201,6 +2205,9 @@ xmlSAX2StartElementNs(void *ctx, */ continue; } + + xmlParserNsUpdateSax(ctxt, pref, ns); + #ifdef LIBXML_VALID_ENABLED if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->myDoc->intSubset) { @@ -2242,7 +2249,7 @@ xmlSAX2StartElementNs(void *ctx, * Note that, if prefix is NULL, this searches for the default Ns */ if ((URI != NULL) && (ret->ns == NULL)) { - ret->ns = xmlSearchNs(ctxt->myDoc, parent, prefix); + ret->ns = xmlParserNsLookupSax(ctxt, prefix); if ((ret->ns == NULL) && (xmlStrEqual(prefix, BAD_CAST "xml"))) { ret->ns = xmlSearchNs(ctxt->myDoc, ret, prefix); } diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 03ebd503..a848dccf 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -172,6 +172,8 @@ typedef enum { } xmlParserMode; typedef struct _xmlStartTag xmlStartTag; +typedef struct _xmlParserNsData xmlParserNsData; +typedef struct _xmlAttrHashBucket xmlAttrHashBucket; /** * xmlParserCtxt: @@ -282,7 +284,7 @@ struct _xmlParserCtxt { int nsNr; /* the number of inherited namespaces */ int nsMax; /* the size of the arrays */ const xmlChar * *nsTab; /* the array of prefix/namespace name */ - int *attallocs; /* which attribute were allocated */ + unsigned *attallocs; /* which attribute were allocated */ xmlStartTag *pushTab; /* array of data for push */ xmlHashTablePtr attsDefault; /* defaulted attributes if any */ xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ @@ -319,6 +321,10 @@ struct _xmlParserCtxt { unsigned short nbErrors; /* number of errors */ unsigned short nbWarnings; /* number of warnings */ unsigned maxAmpl; /* maximum amplification factor */ + + xmlParserNsData *nsdb; /* namespace database */ + unsigned attrHashMax; /* allocated size */ + xmlAttrHashBucket *attrHash; /* atttribute hash table */ }; /** diff --git a/include/private/parser.h b/include/private/parser.h index 50cf2187..67495d11 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -49,4 +49,16 @@ xmlDetectEncoding(xmlParserCtxtPtr ctxt); XML_HIDDEN void xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding); +/* + * These functions allow SAX handlers to attach extra data to namespaces + * efficiently and should be made public. + */ +void +xmlParserNsFree(xmlParserNsData *nsdb); +XML_HIDDEN int +xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix, + void *saxData); +XML_HIDDEN void * +xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix); + #endif /* XML_PARSER_H_PRIVATE__ */ diff --git a/parser.c b/parser.c index 20b7df49..02c880f0 100644 --- a/parser.c +++ b/parser.c @@ -74,6 +74,11 @@ #include "private/io.h" #include "private/parser.h" +#define NS_INDEX_EMPTY INT_MAX +#define NS_INDEX_XML (INT_MAX - 1) +#define URI_HASH_EMPTY 0xD943A04E +#define URI_HASH_XML 0xF0451F02 + struct _xmlStartTag { const xmlChar *prefix; const xmlChar *URI; @@ -81,6 +86,35 @@ struct _xmlStartTag { int nsNr; }; +typedef struct { + void *saxData; + unsigned prefixHashValue; + unsigned uriHashValue; + unsigned elementId; + int oldIndex; +} xmlParserNsExtra; + +typedef struct { + unsigned hashValue; + int index; +} xmlParserNsBucket; + +struct _xmlParserNsData { + xmlParserNsExtra *extra; + + unsigned hashSize; + unsigned hashElems; + xmlParserNsBucket *hash; + + unsigned elementId; + int defaultNsIndex; +}; + +struct _xmlAttrHashBucket { + unsigned hashValue; + int index; +}; + static xmlParserCtxtPtr xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData, const xmlChar *URL, const xmlChar *ID, const xmlChar *base, @@ -130,7 +164,6 @@ unsigned int xmlParserMaxDepth = 256; -#define SAX2 1 #define XML_PARSER_BIG_BUFFER_SIZE 300 #define XML_PARSER_BUFFER_SIZE 100 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" @@ -846,6 +879,14 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) { } } +typedef struct { + xmlHashedString prefix; + xmlHashedString name; + xmlHashedString value; + const xmlChar *valueEnd; + int external; +} xmlDefAttr; + typedef struct _xmlDefAttrs xmlDefAttrs; typedef xmlDefAttrs *xmlDefAttrsPtr; struct _xmlDefAttrs { @@ -853,9 +894,9 @@ struct _xmlDefAttrs { int maxAttrs; /* the size of the array */ #if __STDC_VERSION__ >= 199901L /* Using a C99 flexible array member avoids UBSan errors. */ - const xmlChar *values[]; /* array of localname/prefix/values/external */ + xmlDefAttr attrs[]; /* array of localname/prefix/values/external */ #else - const xmlChar *values[5]; + xmlDefAttr attrs[1]; #endif }; @@ -972,9 +1013,12 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, const xmlChar *fullattr, const xmlChar *value) { xmlDefAttrsPtr defaults; + xmlDefAttr *attr; int len; - const xmlChar *name; - const xmlChar *prefix; + xmlHashedString name; + xmlHashedString prefix; + xmlHashedString hvalue; + const xmlChar *localname; /* * Allows to detect attribute redefinitions @@ -994,41 +1038,38 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, * split the element name into prefix:localname , the string found * are within the DTD and then not associated to namespace names. */ - name = xmlSplitQName3(fullname, &len); - if (name == NULL) { - name = xmlDictLookup(ctxt->dict, fullname, -1); - prefix = NULL; + localname = xmlSplitQName3(fullname, &len); + if (localname == NULL) { + name = xmlDictLookupHashed(ctxt->dict, fullname, -1); + prefix.name = NULL; } else { - name = xmlDictLookup(ctxt->dict, name, -1); - prefix = xmlDictLookup(ctxt->dict, fullname, len); + name = xmlDictLookupHashed(ctxt->dict, localname, -1); + prefix = xmlDictLookupHashed(ctxt->dict, fullname, len); + if (prefix.name == NULL) + goto mem_error; } + if (name.name == NULL) + goto mem_error; /* * make sure there is some storage */ - defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); - if (defaults == NULL) { - defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + - (4 * 5) * sizeof(const xmlChar *)); - if (defaults == NULL) - goto mem_error; - defaults->nbAttrs = 0; - defaults->maxAttrs = 4; - if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, - defaults, NULL) < 0) { - xmlFree(defaults); - goto mem_error; - } - } else if (defaults->nbAttrs >= defaults->maxAttrs) { + defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name); + if ((defaults == NULL) || + (defaults->nbAttrs >= defaults->maxAttrs)) { xmlDefAttrsPtr temp; + int newSize; - temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + - (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); + newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4; + temp = xmlRealloc(defaults, + sizeof(*defaults) + newSize * sizeof(xmlDefAttr)); if (temp == NULL) goto mem_error; - defaults = temp; - defaults->maxAttrs *= 2; - if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, + if (defaults == NULL) + temp->nbAttrs = 0; + temp->maxAttrs = newSize; + defaults = temp; + if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name, defaults, NULL) < 0) { xmlFree(defaults); goto mem_error; @@ -1036,32 +1077,34 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, } /* - * Split the element name into prefix:localname , the string found + * Split the attribute name into prefix:localname , the string found * are within the DTD and hen not associated to namespace names. */ - name = xmlSplitQName3(fullattr, &len); - if (name == NULL) { - name = xmlDictLookup(ctxt->dict, fullattr, -1); - prefix = NULL; + localname = xmlSplitQName3(fullattr, &len); + if (localname == NULL) { + name = xmlDictLookupHashed(ctxt->dict, fullattr, -1); + prefix.name = NULL; } else { - name = xmlDictLookup(ctxt->dict, name, -1); - prefix = xmlDictLookup(ctxt->dict, fullattr, len); + name = xmlDictLookupHashed(ctxt->dict, localname, -1); + prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len); + if (prefix.name == NULL) + goto mem_error; } + if (name.name == NULL) + goto mem_error; - defaults->values[5 * defaults->nbAttrs] = name; - defaults->values[5 * defaults->nbAttrs + 1] = prefix; /* intern the string and precompute the end */ len = xmlStrlen(value); - value = xmlDictLookup(ctxt->dict, value, len); - if (value == NULL) + hvalue = xmlDictLookupHashed(ctxt->dict, value, len); + if (hvalue.name == NULL) goto mem_error; - defaults->values[5 * defaults->nbAttrs + 2] = value; - defaults->values[5 * defaults->nbAttrs + 3] = value + len; - if (ctxt->external) - defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; - else - defaults->values[5 * defaults->nbAttrs + 4] = NULL; - defaults->nbAttrs++; + + attr = &defaults->attrs[defaults->nbAttrs++]; + attr->name = name; + attr->prefix = prefix; + attr->value = hvalue; + attr->valueEnd = hvalue.name + len; + attr->external = ctxt->external; return; @@ -1347,93 +1390,455 @@ region_m49: static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str); -#ifdef SAX2 /** - * nsPush: - * @ctxt: an XML parser context - * @prefix: the namespace prefix or NULL - * @URL: the namespace name + * xmlParserNsCreate: * - * Pushes a new parser namespace on top of the ns stack + * Create a new namespace database. * - * Returns -1 in case of error, -2 if the namespace should be discarded - * and the index in the stack otherwise. + * Returns the new obejct. + */ +static xmlParserNsData * +xmlParserNsCreate(void) { + xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb)); + + if (nsdb == NULL) + return(NULL); + memset(nsdb, 0, sizeof(*nsdb)); + nsdb->defaultNsIndex = INT_MAX; + + return(nsdb); +} + +/** + * xmlParserNsFree: + * @nsdb: namespace database + * + * Free a namespace database. + */ +void +xmlParserNsFree(xmlParserNsData *nsdb) { + if (nsdb == NULL) + return; + + xmlFree(nsdb->extra); + xmlFree(nsdb->hash); + xmlFree(nsdb); +} + +/** + * xmlParserNsReset: + * @nsdb: namespace database + * + * Reset a namespace database. + */ +static void +xmlParserNsReset(xmlParserNsData *nsdb) { + if (nsdb == NULL) + return; + + nsdb->hashElems = 0; + nsdb->elementId = 0; + nsdb->defaultNsIndex = INT_MAX; + + if (nsdb->hash) + memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0])); +} + +/** + * xmlParserStartElement: + * @nsdb: namespace database + * + * Signal that a new element has started. + * + * Returns 0 on success, -1 if the element counter overflowed. */ static int -nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) -{ - if (ctxt->options & XML_PARSE_NSCLEAN) { - int i; - for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { - if (ctxt->nsTab[i] == prefix) { - /* in scope */ - if (ctxt->nsTab[i + 1] == URL) - return(-2); - /* out of scope keep it */ - break; - } - } - } - if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { - ctxt->nsMax = 10; - ctxt->nsNr = 0; - ctxt->nsTab = (const xmlChar **) - xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); - if (ctxt->nsTab == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->nsMax = 0; - return (-1); - } - } else if (ctxt->nsNr >= ctxt->nsMax) { - const xmlChar ** tmp; - ctxt->nsMax *= 2; - tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, - ctxt->nsMax * sizeof(ctxt->nsTab[0])); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->nsMax /= 2; - return (-1); - } - ctxt->nsTab = tmp; - } - ctxt->nsTab[ctxt->nsNr++] = prefix; - ctxt->nsTab[ctxt->nsNr++] = URL; - return (ctxt->nsNr); +xmlParserNsStartElement(xmlParserNsData *nsdb) { + if (nsdb->elementId == UINT_MAX) + return(-1); + nsdb->elementId++; + + return(0); } + /** - * nsPop: + * xmlParserNsLookup: + * @ctxt: parser context + * @prefix: namespace prefix + * @bucketPtr: optional bucket (return value) + * + * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will + * be set to the matching bucket, or the first empty bucket if no match + * was found. + * + * Returns the namespace index on success, INT_MAX if no namespace was + * found. + */ +static int +xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, + xmlParserNsBucket **bucketPtr) { + xmlParserNsBucket *bucket; + unsigned index, hashValue; + + if (prefix->name == NULL) + return(ctxt->nsdb->defaultNsIndex); + + if (ctxt->nsdb->hashSize == 0) + return(INT_MAX); + + hashValue = prefix->hashValue; + index = hashValue & (ctxt->nsdb->hashSize - 1); + bucket = &ctxt->nsdb->hash[index]; + + while (bucket->hashValue) { + if ((bucket->hashValue == hashValue) && + (bucket->index != INT_MAX)) { + if (ctxt->nsTab[bucket->index * 2] == prefix->name) { + if (bucketPtr != NULL) + *bucketPtr = bucket; + return(bucket->index); + } + } + + index++; + bucket++; + if (index == ctxt->nsdb->hashSize) { + index = 0; + bucket = ctxt->nsdb->hash; + } + } + + if (bucketPtr != NULL) + *bucketPtr = bucket; + return(INT_MAX); +} + +/** + * xmlParserNsLookupUri: + * @ctxt: parser context + * @prefix: namespace prefix + * + * Lookup namespace URI with given prefix. + * + * Returns the namespace URI on success, NULL if no namespace was found. + */ +static const xmlChar * +xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) { + const xmlChar *ret; + int nsIndex; + + if (prefix->name == ctxt->str_xml) + return(ctxt->str_xml_ns); + + nsIndex = xmlParserNsLookup(ctxt, prefix, NULL); + if (nsIndex == INT_MAX) + return(NULL); + + ret = ctxt->nsTab[nsIndex * 2 + 1]; + if (ret[0] == 0) + ret = NULL; + return(ret); +} + +/** + * xmlParserNsLookupSax: + * @ctxt: parser context + * @prefix: namespace prefix + * + * Lookup extra data for the given prefix. This returns data stored + * with xmlParserNsUdpateSax. + * + * Returns the data on success, NULL if no namespace was found. + */ +void * +xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { + xmlHashedString hprefix; + int nsIndex; + + if (prefix == ctxt->str_xml) + return(NULL); + + hprefix.name = prefix; + if (prefix != NULL) + hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); + else + hprefix.hashValue = 0; + nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); + if (nsIndex == INT_MAX) + return(NULL); + + return(ctxt->nsdb->extra[nsIndex].saxData); +} + +/** + * xmlParserNsUpdateSax: + * @ctxt: parser context + * @prefix: namespace prefix + * @saxData: extra data for SAX handler + * + * Sets or updates extra data for the given prefix. This value will be + * returned by xmlParserNsLookupSax as long as the namespace with the + * given prefix is in scope. + * + * Returns the data on success, NULL if no namespace was found. + */ +int +xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix, + void *saxData) { + xmlHashedString hprefix; + int nsIndex; + + if (prefix == ctxt->str_xml) + return(-1); + + hprefix.name = prefix; + if (prefix != NULL) + hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); + else + hprefix.hashValue = 0; + nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); + if (nsIndex == INT_MAX) + return(-1); + + ctxt->nsdb->extra[nsIndex].saxData = saxData; + return(0); +} + +/** + * xmlParserNsGrow: + * @ctxt: parser context + * + * Grows the namespace tables. + * + * Returns 0 on success, -1 if a memory allocation failed. + */ +static int +xmlParserNsGrow(xmlParserCtxtPtr ctxt) { + const xmlChar **table; + xmlParserNsExtra *extra; + int newSize; + + if (ctxt->nsMax > INT_MAX / 2) + goto error; + newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16; + + table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0])); + if (table == NULL) + goto error; + ctxt->nsTab = table; + + extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0])); + if (extra == NULL) + goto error; + ctxt->nsdb->extra = extra; + + ctxt->nsMax = newSize; + return(0); + +error: + xmlErrMemory(ctxt, NULL); + return(-1); +} + +/** + * xmlParserNsPush: + * @ctxt: parser context + * @prefix: prefix with hash value + * @uri: uri with hash value + * @saxData: extra data for SAX handler + * @defAttr: whether the namespace comes from a default attribute + * + * Push a new namespace on the table. + * + * Returns 1 if the namespace was pushed, 0 if the namespace was ignored, + * -1 if a memory allocation failed. + */ +static int +xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, + const xmlHashedString *uri, void *saxData, int defAttr) { + xmlParserNsBucket *bucket = NULL; + xmlParserNsExtra *extra; + const xmlChar **ns; + unsigned hashValue, nsIndex, oldIndex; + + if ((prefix != NULL) && (prefix->name == ctxt->str_xml)) + return(0); + + if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) { + xmlErrMemory(ctxt, NULL); + return(-1); + } + + /* + * Default namespace and 'xml' namespace + */ + if ((prefix == NULL) || (prefix->name == NULL)) { + oldIndex = ctxt->nsdb->defaultNsIndex; + + if (oldIndex != INT_MAX) { + if (defAttr != 0) + return(0); + + extra = &ctxt->nsdb->extra[oldIndex]; + + if (extra->elementId == ctxt->nsdb->elementId) { + xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns"); + return(0); + } + } + + ctxt->nsdb->defaultNsIndex = ctxt->nsNr; + goto populate_entry; + } + + /* + * Hash table lookup + */ + oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket); + if (oldIndex != INT_MAX) { + extra = &ctxt->nsdb->extra[oldIndex]; + + if (defAttr != 0) + return(0); + + /* + * Check for duplicate definitions on the same element. + */ + if (extra->elementId == ctxt->nsdb->elementId) { + xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name); + return(0); + } + + bucket->index = ctxt->nsNr; + goto populate_entry; + } + + /* + * Insert new bucket + */ + + hashValue = prefix->hashValue; + + /* + * Grow hash table, 50% fill factor + */ + if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) { + xmlParserNsBucket *newHash; + unsigned newSize, i, index; + + if (ctxt->nsdb->hashSize > UINT_MAX / 2) { + xmlErrMemory(ctxt, NULL); + return(-1); + } + newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16; + newHash = xmlMalloc(newSize * sizeof(newHash[0])); + if (newHash == NULL) { + xmlErrMemory(ctxt, NULL); + return(-1); + } + memset(newHash, 0, newSize * sizeof(newHash[0])); + + for (i = 0; i < ctxt->nsdb->hashSize; i++) { + unsigned hv = ctxt->nsdb->hash[i].hashValue; + unsigned newIndex; + + if (hv == 0) + continue; + newIndex = hv & (newSize - 1); + + while (newHash[newIndex].hashValue != 0) { + newIndex++; + if (newIndex == newSize) + newIndex = 0; + } + + newHash[newIndex] = ctxt->nsdb->hash[i]; + } + + xmlFree(ctxt->nsdb->hash); + ctxt->nsdb->hash = newHash; + ctxt->nsdb->hashSize = newSize; + + /* + * Relookup + */ + index = hashValue & (newSize - 1); + + while (newHash[index].hashValue != 0) { + index++; + if (index == newSize) + index = 0; + } + + bucket = &newHash[index]; + } + + bucket->hashValue = hashValue; + bucket->index = ctxt->nsNr; + ctxt->nsdb->hashElems++; + oldIndex = INT_MAX; + +populate_entry: + nsIndex = ctxt->nsNr; + + ns = &ctxt->nsTab[nsIndex * 2]; + ns[0] = prefix ? prefix->name : NULL; + ns[1] = uri->name; + + extra = &ctxt->nsdb->extra[nsIndex]; + extra->saxData = saxData; + extra->prefixHashValue = prefix ? prefix->hashValue : 0; + extra->uriHashValue = uri->hashValue; + extra->elementId = ctxt->nsdb->elementId; + extra->oldIndex = oldIndex; + + ctxt->nsNr++; + + return(1); +} + +/** + * xmlParserNsPop: * @ctxt: an XML parser context * @nr: the number to pop * - * Pops the top @nr parser prefix/namespace from the ns stack + * Pops the top @nr namespaces and restores the hash table. * - * Returns the number of namespaces removed + * Returns the number of namespaces popped. */ static int -nsPop(xmlParserCtxtPtr ctxt, int nr) +xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr) { int i; - if (ctxt->nsTab == NULL) return(0); - if (ctxt->nsNr < nr) { - xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); - nr = ctxt->nsNr; - } - if (ctxt->nsNr <= 0) - return (0); + /* assert(nr <= ctxt->nsNr); */ - for (i = 0;i < nr;i++) { - ctxt->nsNr--; - ctxt->nsTab[ctxt->nsNr] = NULL; + for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) { + const xmlChar *prefix = ctxt->nsTab[i * 2]; + xmlParserNsExtra *extra = &ctxt->nsdb->extra[i]; + + if (prefix == NULL) { + ctxt->nsdb->defaultNsIndex = extra->oldIndex; + } else { + xmlHashedString hprefix; + xmlParserNsBucket *bucket = NULL; + + hprefix.name = prefix; + hprefix.hashValue = extra->prefixHashValue; + xmlParserNsLookup(ctxt, &hprefix, &bucket); + /* assert(bucket && bucket->hashValue); */ + bucket->index = extra->oldIndex; + } } + + ctxt->nsNr -= nr; return(nr); } -#endif static int xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { const xmlChar **atts; - int *attallocs; + unsigned *attallocs; int maxatts; if (nr + 5 > ctxt->maxatts) { @@ -1441,8 +1846,8 @@ xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { atts = (const xmlChar **) xmlMalloc( maxatts * sizeof(const xmlChar *)); if (atts == NULL) goto mem_error; - attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, - (maxatts / 5) * sizeof(int)); + attallocs = xmlRealloc(ctxt->attallocs, + (maxatts / 5) * sizeof(attallocs[0])); if (attallocs == NULL) { xmlFree(atts); goto mem_error; @@ -8406,30 +8811,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { * * ************************************************************************/ -/* - * xmlGetNamespace: - * @ctxt: an XML parser context - * @prefix: the prefix to lookup - * - * Lookup the namespace name for the @prefix (which ca be NULL) - * The prefix must come from the @ctxt->dict dictionary - * - * Returns the namespace name or NULL if not bound - */ -static const xmlChar * -xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { - int i; - - if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); - for (i = ctxt->nsNr - 2;i >= 0;i-=2) - if (ctxt->nsTab[i] == prefix) { - if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) - return(NULL); - return(ctxt->nsTab[i + 1]); - } - return(NULL); -} - /** * xmlParseQNameHashed: * @ctxt: an XML parser context @@ -8778,24 +9159,30 @@ need_complex: * Returns the attribute name, and the value in *value, . */ -static const xmlChar * +static xmlHashedString xmlParseAttribute2(xmlParserCtxtPtr ctxt, const xmlChar * pref, const xmlChar * elem, - const xmlChar ** prefix, xmlChar ** value, + xmlHashedString * hprefix, xmlChar ** value, int *len, int *alloc) { - const xmlChar *name; + xmlHashedString hname; + const xmlChar *prefix, *name; xmlChar *val, *internal_val = NULL; int normalize = 0; *value = NULL; GROW; - name = xmlParseQName(ctxt, prefix); - if (name == NULL) { + hname = xmlParseQNameHashed(ctxt, hprefix); + if (hname.name == NULL) { xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, "error parsing attribute name\n"); - return (NULL); + return(hname); } + name = hname.name; + if (hprefix->name != NULL) + prefix = hprefix->name; + else + prefix = NULL; /* * get the type if needed @@ -8804,7 +9191,8 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, int type; type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial, - pref, elem, *prefix, name); + pref, elem, + prefix, name); if (type != 0) normalize = 1; } @@ -8817,8 +9205,10 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, NEXT; SKIP_BLANKS; val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); - if (val == NULL) - return (NULL); + if (val == NULL) { + hname.name = NULL; + return(hname); + } if (normalize) { /* * Sometimes a second normalisation pass for spaces is needed @@ -8841,10 +9231,10 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, "Specification mandates value for attribute %s\n", name); - return (name); + return(hname); } - if (*prefix == ctxt->str_xml) { + if (prefix == ctxt->str_xml) { /* * Check that xml:lang conforms to the specification * No more registered as an error, just generate a warning now @@ -8880,8 +9270,149 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, } *value = val; - return (name); + return (hname); } + +ATTRIBUTE_NO_SANITIZE_INTEGER +static unsigned +xmlCombineHash(unsigned v1, unsigned v2) { + return(HASH_ROL(v1, 15) ^ v2); +} + +/** + * xmlAttrHashInsert: + * @ctxt: parser context + * @aindex: attribute index (this is a multiple of 5) + * @sizePtr: size of the hash table (input/output value) + * @name: attribute name + * @uri: namespace uri + * @hashValue: combined hash value of name and uri + * + * Inserts a new attribute into the hash table. + * + * Returns INT_MAX if no existing attribute was found, the attribute + * index if an attribute was found, -1 if a memory allocation failed. + */ +static int +xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr, + const xmlChar *name, const xmlChar *uri, + unsigned hashValue) { + xmlAttrHashBucket *table = ctxt->attrHash; + xmlAttrHashBucket *bucket; + unsigned hindex; + unsigned size = *sizePtr; + + if (size > 0) { + hindex = hashValue & (size - 1); + bucket = &table[hindex]; + + while (bucket->hashValue != 0) { + const xmlChar **atts = &ctxt->atts[bucket->index]; + + if (name == atts[0]) { + int nsIndex = (int) (ptrdiff_t) atts[2]; + + if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) : + (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) : + (uri == ctxt->nsTab[nsIndex * 2 + 1])) + return(bucket->index); + } + + hindex++; + bucket++; + if (hindex >= size) { + hindex = 0; + bucket = table; + } + } + } + + /* + * Grow hash table + */ + if ((unsigned) aindex / 5 >= size / 2) { + xmlAttrHashBucket *newTable; + unsigned newSize, i, nindex; + + newSize = size ? size * 2 : 8; + + if (newSize > ctxt->attrHashMax) { + newTable = xmlRealloc(table, newSize * sizeof(newTable[0])); + if (newTable == NULL) { + xmlErrMemory(ctxt, NULL); + return(-1); + } + + table = newTable; + ctxt->attrHash = newTable; + ctxt->attrHashMax = newSize; + } + + memset(&table[size], 0, (newSize - size) * sizeof(table[0])); + + if (size > 0) { + /* + * We must search for the start of a probe sequence to make + * in-place operation work. + */ + hindex = 0; + bucket = table; + while (bucket->hashValue != 0) { + hindex++; + bucket++; + } + + for (i = 0; i < size; i++) { + if (bucket->hashValue != 0) { + nindex = bucket->hashValue & (newSize - 1); + + while (nindex != hindex) { + if (table[nindex].hashValue == 0) { + table[nindex] = *bucket; + bucket->hashValue = 0; + break; + } + + nindex++; + if (nindex >= newSize) + nindex = 0; + } + } + + hindex++; + bucket++; + if (hindex >= size) { + hindex = 0; + bucket = table; + } + } + } + + size = newSize; + *sizePtr = newSize; + + /* + * Relookup + */ + hindex = hashValue & (size - 1); + bucket = &table[hindex]; + + while (bucket->hashValue != 0) { + hindex++; + bucket++; + if (hindex >= size) { + hindex = 0; + bucket = table; + } + } + } + + bucket->hashValue = hashValue; + bucket->index = aindex; + + return(INT_MAX); +} + /** * xmlParseStartTag2: * @ctxt: an XML parser context @@ -8913,40 +9444,55 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, static const xmlChar * xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, - const xmlChar **URI, int *tlen) { + const xmlChar **URI, int *nbNsPtr) { + xmlHashedString hlocalname; + xmlHashedString hprefix; + xmlHashedString hattname; + xmlHashedString haprefix; const xmlChar *localname; const xmlChar *prefix; const xmlChar *attname; const xmlChar *aprefix; - const xmlChar *nsname; - xmlChar *attvalue; + const xmlChar *uri; + xmlChar *attvalue = NULL; const xmlChar **atts = ctxt->atts; + unsigned attrHashSize = 0; int maxatts = ctxt->maxatts; int nratts, nbatts, nbdef, inputid; - int i, j, nbNs, attval; - size_t cur; - int nsNr = ctxt->nsNr; + int i, j, nbNs, attval, nsIndex; + int alloc = 0; if (RAW != '<') return(NULL); NEXT1; - cur = ctxt->input->cur - ctxt->input->base; inputid = ctxt->input->id; nbatts = 0; nratts = 0; nbdef = 0; nbNs = 0; attval = 0; - /* Forget any namespaces added during an earlier parse of this element. */ - ctxt->nsNr = nsNr; - localname = xmlParseQName(ctxt, &prefix); - if (localname == NULL) { + if (ctxt->nsdb == NULL) { + ctxt->nsdb = xmlParserNsCreate(); + if (ctxt->nsdb == NULL) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + } + + if (xmlParserNsStartElement(ctxt->nsdb) < 0) { + xmlErrMemory(ctxt, NULL); + return(NULL); + } + + hlocalname = xmlParseQNameHashed(ctxt, &hprefix); + if (hlocalname.name == NULL) { xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, "StartTag: invalid element name\n"); return(NULL); } - *tlen = ctxt->input->cur - ctxt->input->base - cur; + localname = hlocalname.name; + prefix = hprefix.name; /* * Now parse the attributes, it ends up with the ending @@ -8956,48 +9502,74 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, SKIP_BLANKS; GROW; + /* + * The ctxt->atts array will be ultimately passed to the SAX callback + * containing five xmlChar pointers for each attribute: + * + * [0] attribute name + * [1] attribute prefix + * [2] namespace URI + * [3] attribute value + * [4] end of attribute value + * + * To save memory, we reuse this array temporarily and store integers + * in these pointer variables. + * + * [0] attribute name + * [1] attribute prefix + * [2] hash value of attribute prefix, and later namespace index + * [3] for non-allocated values: ptrdiff_t offset into input buffer + * [4] for non-allocated values: ptrdiff_t offset into input buffer + * + * The ctxt->attallocs array contains an additional unsigned int for + * each attribute, containing the hash value of the attribute name + * and the alloc flag in bit 31. + */ + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { - int len = -1, alloc = 0; + int len = -1; - attname = xmlParseAttribute2(ctxt, prefix, localname, - &aprefix, &attvalue, &len, &alloc); - if (attname == NULL) { + hattname = xmlParseAttribute2(ctxt, prefix, localname, + &haprefix, &attvalue, &len, + &alloc); + if (hattname.name == NULL) { xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "xmlParseStartTag: problem parsing attributes\n"); break; } if (attvalue == NULL) goto next_attr; + attname = hattname.name; + aprefix = haprefix.name; if (len < 0) len = xmlStrlen(attvalue); if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { - const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); - xmlURIPtr uri; + xmlHashedString huri; + xmlURIPtr parsedUri; - if (URL == NULL) { - xmlErrMemory(ctxt, "dictionary allocation failure"); - if ((attvalue != NULL) && (alloc != 0)) - xmlFree(attvalue); - localname = NULL; - goto done; + huri = xmlDictLookupHashed(ctxt->dict, attvalue, len); + uri = huri.name; + if (uri == NULL) { + xmlErrMemory(ctxt, NULL); + goto next_attr; } - if (*URL != 0) { - uri = xmlParseURI((const char *) URL); - if (uri == NULL) { + if (*uri != 0) { + parsedUri = xmlParseURI((const char *) uri); + if (parsedUri == NULL) { xmlNsErr(ctxt, XML_WAR_NS_URI, "xmlns: '%s' is not a valid URI\n", - URL, NULL, NULL); + uri, NULL, NULL); } else { - if (uri->scheme == NULL) { + if (parsedUri->scheme == NULL) { xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, "xmlns: URI %s is not absolute\n", - URL, NULL, NULL); + uri, NULL, NULL); } - xmlFreeURI(uri); + xmlFreeURI(parsedUri); } - if (URL == ctxt->str_xml_ns) { + if (uri == ctxt->str_xml_ns) { if (attname != ctxt->str_xml) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "xml namespace URI cannot be the default namespace\n", @@ -9006,7 +9578,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, goto next_attr; } if ((len == 29) && - (xmlStrEqual(URL, + (xmlStrEqual(uri, BAD_CAST "http://www.w3.org/2000/xmlns/"))) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "reuse of the xmlns namespace name is forbidden\n", @@ -9014,23 +9586,22 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, goto next_attr; } } - /* - * check that it's not a defined namespace - */ - for (j = 1;j <= nbNs;j++) - if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) - break; - if (j <= nbNs) - xmlErrAttributeDup(ctxt, NULL, attname); - else - if (nsPush(ctxt, NULL, URL) > 0) nbNs++; + if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0) + nbNs++; } else if (aprefix == ctxt->str_xmlns) { - const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); - xmlURIPtr uri; + xmlHashedString huri; + xmlURIPtr parsedUri; + + huri = xmlDictLookupHashed(ctxt->dict, attvalue, len); + uri = huri.name; + if (uri == NULL) { + xmlErrMemory(ctxt, NULL); + goto next_attr; + } if (attname == ctxt->str_xml) { - if (URL != ctxt->str_xml_ns) { + if (uri != ctxt->str_xml_ns) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "xml namespace prefix mapped to wrong URI\n", NULL, NULL, NULL); @@ -9040,7 +9611,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, */ goto next_attr; } - if (URL == ctxt->str_xml_ns) { + if (uri == ctxt->str_xml_ns) { if (attname != ctxt->str_xml) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "xml namespace URI mapped to wrong prefix\n", @@ -9055,48 +9626,40 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, goto next_attr; } if ((len == 29) && - (xmlStrEqual(URL, + (xmlStrEqual(uri, BAD_CAST "http://www.w3.org/2000/xmlns/"))) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "reuse of the xmlns namespace name is forbidden\n", NULL, NULL, NULL); goto next_attr; } - if ((URL == NULL) || (URL[0] == 0)) { + if ((uri == NULL) || (uri[0] == 0)) { xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, "xmlns:%s: Empty XML namespace is not allowed\n", attname, NULL, NULL); goto next_attr; } else { - uri = xmlParseURI((const char *) URL); - if (uri == NULL) { + parsedUri = xmlParseURI((const char *) uri); + if (parsedUri == NULL) { xmlNsErr(ctxt, XML_WAR_NS_URI, "xmlns:%s: '%s' is not a valid URI\n", - attname, URL, NULL); + attname, uri, NULL); } else { - if ((ctxt->pedantic) && (uri->scheme == NULL)) { + if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) { xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, "xmlns:%s: URI %s is not absolute\n", - attname, URL, NULL); + attname, uri, NULL); } - xmlFreeURI(uri); + xmlFreeURI(parsedUri); } } - /* - * check that it's not a defined namespace - */ - for (j = 1;j <= nbNs;j++) - if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) - break; - if (j <= nbNs) - xmlErrAttributeDup(ctxt, aprefix, attname); - else - if (nsPush(ctxt, attname, URL) > 0) nbNs++; - + if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0) + nbNs++; } else { /* - * Add the pair to atts + * Populate attributes array, see above for repurposing + * of xmlChar pointers. */ if ((atts == NULL) || (nbatts + 5 > maxatts)) { if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { @@ -9105,10 +9668,11 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, maxatts = ctxt->maxatts; atts = ctxt->atts; } - ctxt->attallocs[nratts++] = alloc; + ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) | + ((unsigned) alloc << 31); atts[nbatts++] = attname; atts[nbatts++] = aprefix; - atts[nbatts++] = NULL; + atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue; if (alloc) { atts[nbatts++] = attvalue; attvalue += len; @@ -9156,169 +9720,249 @@ next_attr: goto done; } - /* Reconstruct attribute value pointers. */ - for (i = 0, j = 0; j < nratts; i += 5, j++) { - if (ctxt->attallocs[j] == 0) { - atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */ - atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */ - } - } - /* - * The attributes defaulting + * Namespaces from default attributes */ if (ctxt->attsDefault != NULL) { xmlDefAttrsPtr defaults; defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); if (defaults != NULL) { - for (i = 0;i < defaults->nbAttrs;i++) { - attname = defaults->values[5 * i]; - aprefix = defaults->values[5 * i + 1]; + for (i = 0; i < defaults->nbAttrs; i++) { + xmlDefAttr *attr = &defaults->attrs[i]; + + attname = attr->name.name; + aprefix = attr->prefix.name; - /* - * special work for namespaces defaulted defs - */ if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { - /* - * check that it's not a defined namespace - */ - for (j = 1;j <= nbNs;j++) - if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) - break; - if (j <= nbNs) continue; - - nsname = xmlGetNamespace(ctxt, NULL); - if (nsname != defaults->values[5 * i + 2]) { - if (nsPush(ctxt, NULL, - defaults->values[5 * i + 2]) > 0) - nbNs++; - } + if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0) + nbNs++; } else if (aprefix == ctxt->str_xmlns) { - /* - * check that it's not a defined namespace - */ - for (j = 1;j <= nbNs;j++) - if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) - break; - if (j <= nbNs) continue; - - nsname = xmlGetNamespace(ctxt, attname); - if (nsname != defaults->values[5 * i + 2]) { - if (nsPush(ctxt, attname, - defaults->values[5 * i + 2]) > 0) - nbNs++; - } - } else { - /* - * check that it's not a defined attribute - */ - for (j = 0;j < nbatts;j+=5) { - if ((attname == atts[j]) && (aprefix == atts[j+1])) - break; - } - if (j < nbatts) continue; - - if ((atts == NULL) || (nbatts + 5 > maxatts)) { - if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { - localname = NULL; - goto done; - } - maxatts = ctxt->maxatts; - atts = ctxt->atts; - } - atts[nbatts++] = attname; - atts[nbatts++] = aprefix; - if (aprefix == NULL) - atts[nbatts++] = NULL; - else - atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); - atts[nbatts++] = defaults->values[5 * i + 2]; - atts[nbatts++] = defaults->values[5 * i + 3]; - if ((ctxt->standalone == 1) && - (defaults->values[5 * i + 4] != NULL)) { - xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, - "standalone: attribute %s on %s defaulted from external subset\n", - attname, localname); - } - nbdef++; + if (xmlParserNsPush(ctxt, &attr->name, &attr->value, + NULL, 1) > 0) + nbNs++; } } } } /* - * The attributes checkings + * Resolve attribute namespaces */ - for (i = 0; i < nbatts;i += 5) { + for (i = 0; i < nbatts; i += 5) { + attname = atts[i]; + aprefix = atts[i+1]; + /* * The default namespace does not apply to attribute names. */ - if (atts[i + 1] != NULL) { - nsname = xmlGetNamespace(ctxt, atts[i + 1]); - if (nsname == NULL) { - xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, + if (aprefix == NULL) { + nsIndex = NS_INDEX_EMPTY; + } else if (aprefix == ctxt->str_xml) { + nsIndex = NS_INDEX_XML; + } else { + haprefix.name = aprefix; + haprefix.hashValue = (size_t) atts[i+2]; + nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL); + if (nsIndex == INT_MAX) { + xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, "Namespace prefix %s for %s on %s is not defined\n", - atts[i + 1], atts[i], localname); - } - atts[i + 2] = nsname; - } else - nsname = NULL; + aprefix, attname, localname); + nsIndex = NS_INDEX_EMPTY; + } + } + + atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex; + } + + /* + * Verify that attribute names are unique. + */ + for (i = 0, j = 0; j < nratts; i += 5, j++) { + const xmlChar *nsuri; + unsigned hashValue, nameHashValue, uriHashValue; + int res; + + attname = atts[i]; + aprefix = atts[i+1]; + nsIndex = (ptrdiff_t) atts[i+2]; + /* Hash values always have bit 31 set, see dict.c */ + nameHashValue = ctxt->attallocs[j] | 0x80000000; + + if (nsIndex == NS_INDEX_EMPTY) { + nsuri = NULL; + uriHashValue = URI_HASH_EMPTY; + } else if (nsIndex == NS_INDEX_XML) { + nsuri = ctxt->str_xml_ns; + uriHashValue = URI_HASH_XML; + } else { + nsuri = ctxt->nsTab[nsIndex * 2 + 1]; + uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; + } + + hashValue = xmlCombineHash(nameHashValue, uriHashValue); + res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri, + hashValue); + if (res < 0) + continue; + /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same * start-tag or empty-element tag. * As extended by the Namespace in XML REC. */ - for (j = 0; j < i;j += 5) { - if (atts[i] == atts[j]) { - if (atts[i+1] == atts[j+1]) { - xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); - break; - } - if ((nsname != NULL) && (atts[j + 2] == nsname)) { - xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, - "Namespaced Attribute %s in '%s' redefined\n", - atts[i], nsname, NULL); - break; - } + if (res < INT_MAX) { + if (aprefix == atts[res+1]) { + xmlErrAttributeDup(ctxt, aprefix, attname); + } else { + xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, + "Namespaced Attribute %s in '%s' redefined\n", + attname, nsuri, NULL); + } + } + } + + /* + * Default attributes + */ + if (ctxt->attsDefault != NULL) { + xmlDefAttrsPtr defaults; + + defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); + if (defaults != NULL) { + for (i = 0; i < defaults->nbAttrs; i++) { + xmlDefAttr *attr = &defaults->attrs[i]; + const xmlChar *nsuri; + unsigned hashValue, uriHashValue; + int res; + + attname = attr->name.name; + aprefix = attr->prefix.name; + + if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) + continue; + if (aprefix == ctxt->str_xmlns) + continue; + + if (aprefix == NULL) { + nsIndex = NS_INDEX_EMPTY; + nsuri = NULL; + uriHashValue = URI_HASH_EMPTY; + } if (aprefix == ctxt->str_xml) { + nsIndex = NS_INDEX_XML; + nsuri = ctxt->str_xml_ns; + uriHashValue = URI_HASH_XML; + } else if (aprefix != NULL) { + nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL); + if (nsIndex == INT_MAX) { + xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, + "Namespace prefix %s for %s on %s is not " + "defined\n", + aprefix, attname, localname); + nsIndex = NS_INDEX_EMPTY; + nsuri = NULL; + uriHashValue = URI_HASH_EMPTY; + } else { + nsuri = ctxt->nsTab[nsIndex * 2 + 1]; + uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue; + } + } + + /* + * Check whether the attribute exists + */ + hashValue = xmlCombineHash(attr->name.hashValue, uriHashValue); + res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname, + nsuri, hashValue); + if (res < 0) + continue; + if (res < INT_MAX) { + if (aprefix == atts[res+1]) + continue; + xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, + "Namespaced Attribute %s in '%s' redefined\n", + attname, nsuri, NULL); + } + + if ((atts == NULL) || (nbatts + 5 > maxatts)) { + if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { + localname = NULL; + goto done; + } + maxatts = ctxt->maxatts; + atts = ctxt->atts; + } + + atts[nbatts++] = attname; + atts[nbatts++] = aprefix; + atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex; + atts[nbatts++] = attr->value.name; + atts[nbatts++] = attr->valueEnd; + if ((ctxt->standalone == 1) && (attr->external != 0)) { + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + "standalone: attribute %s on %s defaulted " + "from external subset\n", + attname, localname); + } + nbdef++; } } } - nsname = xmlGetNamespace(ctxt, prefix); - if ((prefix != NULL) && (nsname == NULL)) { + /* + * Reconstruct attribute pointers + */ + for (i = 0, j = 0; i < nbatts; i += 5, j++) { + /* namespace URI */ + nsIndex = (ptrdiff_t) atts[i+2]; + if (nsIndex == INT_MAX) + atts[i+2] = NULL; + else if (nsIndex == INT_MAX - 1) + atts[i+2] = ctxt->str_xml_ns; + else + atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1]; + + if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) { + atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */ + atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */ + } + } + + uri = xmlParserNsLookupUri(ctxt, &hprefix); + if ((prefix != NULL) && (uri == NULL)) { xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, "Namespace prefix %s on %s is not defined\n", prefix, localname, NULL); } *pref = prefix; - *URI = nsname; + *URI = uri; /* - * SAX: Start of Element ! + * SAX callback */ if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && (!ctxt->disableSAX)) { if (nbNs > 0) - ctxt->sax->startElementNs(ctxt->userData, localname, prefix, - nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], + ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri, + nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs), nbatts / 5, nbdef, atts); else - ctxt->sax->startElementNs(ctxt->userData, localname, prefix, - nsname, 0, NULL, nbatts / 5, nbdef, atts); + ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri, + 0, NULL, nbatts / 5, nbdef, atts); } done: /* - * Free up attribute allocated strings if needed + * Free allocated attribute values */ if (attval != 0) { - for (i = 3,j = 0; j < nratts;i += 5,j++) - if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) - xmlFree((xmlChar *) atts[i]); + for (i = 0, j = 0; j < nratts; i += 5, j++) + if (ctxt->attallocs[j] & 0x80000000) + xmlFree((xmlChar *) atts[i+3]); } + *nbNsPtr = nbNs; return(localname); } @@ -9388,7 +10032,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { spacePop(ctxt); if (tag->nsNr != 0) - nsPop(ctxt, tag->nsNr); + xmlParserNsPop(ctxt, tag->nsNr); } /** @@ -9655,9 +10299,9 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { const xmlChar *prefix = NULL; const xmlChar *URI = NULL; xmlParserNodeInfo node_info; - int line, tlen = 0; + int line; xmlNodePtr cur; - int nsNr = ctxt->nsNr; + int nbNs = 0; if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && ((ctxt->options & XML_PARSE_HUGE) == 0)) { @@ -9686,7 +10330,7 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { #ifdef LIBXML_SAX1_ENABLED if (ctxt->sax2) #endif /* LIBXML_SAX1_ENABLED */ - name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); + name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs); #ifdef LIBXML_SAX1_ENABLED else name = xmlParseStartTag(ctxt); @@ -9697,7 +10341,7 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { spacePop(ctxt); return(-1); } - nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); + nameNsPush(ctxt, name, prefix, URI, line, nbNs); cur = ctxt->node; #ifdef LIBXML_VALID_ENABLED @@ -9729,8 +10373,8 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { } namePop(ctxt); spacePop(ctxt); - if (nsNr != ctxt->nsNr) - nsPop(ctxt, ctxt->nsNr - nsNr); + if (nbNs > 0) + xmlParserNsPop(ctxt, nbNs); if (cur != NULL && ctxt->record_info) { node_info.node = cur; node_info.end_pos = ctxt->input->consumed + @@ -9759,8 +10403,8 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { nodePop(ctxt); namePop(ctxt); spacePop(ctxt); - if (nsNr != ctxt->nsNr) - nsPop(ctxt, ctxt->nsNr - nsNr); + if (nbNs > 0) + xmlParserNsPop(ctxt, nbNs); return(-1); } @@ -10886,7 +11530,6 @@ xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { static int xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { int ret = 0; - int tlen; size_t avail; xmlChar cur, next; @@ -10981,7 +11624,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { const xmlChar *prefix = NULL; const xmlChar *URI = NULL; int line = ctxt->input->line; - int nsNr = ctxt->nsNr; + int nbNs; if ((!terminate) && (avail < 2)) goto done; @@ -11005,7 +11648,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { #ifdef LIBXML_SAX1_ENABLED if (ctxt->sax2) #endif /* LIBXML_SAX1_ENABLED */ - name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); + name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs); #ifdef LIBXML_SAX1_ENABLED else name = xmlParseStartTag(ctxt); @@ -11042,8 +11685,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { (!ctxt->disableSAX)) ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); - if (ctxt->nsNr - nsNr > 0) - nsPop(ctxt, ctxt->nsNr - nsNr); + if (nbNs > 0) + xmlParserNsPop(ctxt, nbNs); #ifdef LIBXML_SAX1_ENABLED } else { if ((ctxt->sax != NULL) && @@ -11071,7 +11714,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { nodePop(ctxt); spacePop(ctxt); } - nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); + nameNsPush(ctxt, name, prefix, URI, line, nbNs); if (ctxt->instate == XML_PARSER_EOF) goto done; @@ -12240,9 +12883,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlNodePtr content = NULL; xmlNodePtr last = NULL; xmlParserErrors ret = XML_ERR_OK; -#ifdef SAX2 - int i; -#endif + unsigned i; if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || (oldctxt->depth > 100)) { @@ -12272,12 +12913,32 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); -#ifdef SAX2 /* propagate namespaces down the entity */ - for (i = 0;i < oldctxt->nsNr;i += 2) { - nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); + if (oldctxt->nsdb != NULL) { + ctxt->nsdb = xmlParserNsCreate(); + if (ctxt->nsdb == NULL) + return(XML_ERR_NO_MEMORY); + for (i = 0; i < oldctxt->nsdb->hashSize; i++) { + xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i]; + xmlHashedString hprefix, huri; + const xmlChar **ns; + xmlParserNsExtra *extra; + unsigned nsIndex; + + if ((bucket->hashValue != 0) && + (bucket->index != INT_MAX)) { + nsIndex = bucket->index; + ns = &oldctxt->nsTab[nsIndex * 2]; + extra = &oldctxt->nsdb->extra[nsIndex]; + + hprefix.name = ns[0]; + hprefix.hashValue = bucket->hashValue; + huri.name = ns[1]; + huri.hashValue = extra->uriHashValue; + xmlParserNsPush(ctxt, &hprefix, &huri, extra->saxData, 0); + } + } } -#endif oldsax = ctxt->sax; ctxt->sax = oldctxt->sax; @@ -12428,7 +13089,6 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlParserErrors xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, int options, xmlNodePtr *lst) { -#ifdef SAX2 xmlParserCtxtPtr ctxt; xmlDocPtr doc = NULL; xmlNodePtr fake, cur; @@ -12536,21 +13196,13 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, cur = node; while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { xmlNsPtr ns = cur->nsDef; - const xmlChar *iprefix, *ihref; + xmlHashedString hprefix, huri; while (ns != NULL) { - if (ctxt->dict) { - iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); - ihref = xmlDictLookup(ctxt->dict, ns->href, -1); - } else { - iprefix = ns->prefix; - ihref = ns->href; - } - - if (xmlGetNamespace(ctxt, iprefix) == NULL) { - nsPush(ctxt, iprefix, ihref); - nsnr++; - } + hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1); + huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1); + if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0) + nsnr++; ns = ns->next; } cur = cur->parent; @@ -12571,7 +13223,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, #endif xmlParseContent(ctxt); - nsPop(ctxt, nsnr); + xmlParserNsPop(ctxt, nsnr); if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -12625,9 +13277,6 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, xmlFreeParserCtxt(ctxt); return(ret); -#else /* !SAX2 */ - return(XML_ERR_INTERNAL_ERROR); -#endif } #ifdef LIBXML_SAX1_ENABLED @@ -13683,6 +14332,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->name = NULL; ctxt->nsNr = 0; + xmlParserNsReset(ctxt->nsdb); DICT_FREE(ctxt->version); ctxt->version = NULL; diff --git a/parserInternals.c b/parserInternals.c index 8d574fc0..693e598f 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -2061,7 +2061,9 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); - if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); + if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab); + if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb); + if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash); if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); if (ctxt->attsDefault != NULL)