mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-26 00:37:43 +03:00 
			
		
		
		
	parser: Rework parsing of attribute and entity values
Don't use a separate function to handle "complex" attributes. Validate UTF-8 byte sequences without decoding. This should improve performance considerably when parsing multi-byte UTF-8 sequences. Use a string buffer to avoid unnecessary allocations and copying when expanding entities. Normalize attribute values in a single pass while expanding entities. Be more lenient in recovery mode. If no entity substitution was requested, validate entities without expanding. Fixes #596. Also fixes #655.
This commit is contained in:
		
							
								
								
									
										30
									
								
								SAX2.c
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								SAX2.c
									
									
									
									
									
								
							| @@ -970,10 +970,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname, | ||||
|         (void) nsret; | ||||
|  | ||||
|         if (!ctxt->replaceEntities) { | ||||
| 	    ctxt->depth++; | ||||
| 	    val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF, | ||||
| 		                          0,0,0); | ||||
| 	    ctxt->depth--; | ||||
|             /* TODO: normalize if needed */ | ||||
| 	    val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0); | ||||
| 	    if (val == NULL) { | ||||
| 	        xmlSAX2ErrMemory(ctxt); | ||||
| 		if (name != NULL) | ||||
| @@ -1038,10 +1036,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname, | ||||
|         (void) nsret; | ||||
|  | ||||
|         if (!ctxt->replaceEntities) { | ||||
| 	    ctxt->depth++; | ||||
| 	    val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF, | ||||
| 		                          0,0,0); | ||||
| 	    ctxt->depth--; | ||||
|             /* TODO: normalize if needed */ | ||||
| 	    val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0); | ||||
| 	    if (val == NULL) { | ||||
| 	        xmlSAX2ErrMemory(ctxt); | ||||
| 	        xmlFree(ns); | ||||
| @@ -1179,10 +1175,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname, | ||||
|         if (!ctxt->replaceEntities) { | ||||
| 	    xmlChar *val; | ||||
|  | ||||
| 	    ctxt->depth++; | ||||
| 	    val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF, | ||||
| 		                          0,0,0); | ||||
| 	    ctxt->depth--; | ||||
|             /* TODO: normalize if needed */ | ||||
| 	    val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0); | ||||
|  | ||||
| 	    if (val == NULL) | ||||
| 		ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, | ||||
| @@ -1736,7 +1730,6 @@ static xmlChar * | ||||
| xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, | ||||
|                           const xmlChar *end) { | ||||
|     const xmlChar *in; | ||||
|     xmlChar *ret; | ||||
|  | ||||
|     in = str; | ||||
|     while (in < end) | ||||
| @@ -1744,11 +1737,12 @@ xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, | ||||
| 	    goto decode; | ||||
|     return(NULL); | ||||
| decode: | ||||
|     ctxt->depth++; | ||||
|     ret = xmlStringLenDecodeEntities(ctxt, str, end - str, | ||||
| 				     XML_SUBSTITUTE_REF, 0,0,0); | ||||
|     ctxt->depth--; | ||||
|     return(ret); | ||||
|     /* | ||||
|      * If the value contains '&', we can be sure it was allocated and is | ||||
|      * zero-terminated. | ||||
|      */ | ||||
|     /* TODO: normalize if needed */ | ||||
|     return(xmlExpandEntitiesInAttValue(ctxt, str, /* normalize */ 0)); | ||||
| } | ||||
| #endif /* LIBXML_VALID_ENABLED */ | ||||
|  | ||||
|   | ||||
| @@ -9,13 +9,17 @@ | ||||
|  * | ||||
|  * XML_ENT_PARSED: The entity was parsed and `children` points to the | ||||
|  * content. | ||||
|  * XML_ENT_CHECKED: The entity was checked for loops. | ||||
|  * | ||||
|  * XML_ENT_CHECKED: The entity was checked for loops and amplification. | ||||
|  * expandedSize was set. | ||||
|  * | ||||
|  * XML_ENT_VALIDATED: The entity contains a valid attribute value. | ||||
|  * Only used when entities aren't substituted. | ||||
|  */ | ||||
| #define XML_ENT_PARSED      (1<<0) | ||||
| #define XML_ENT_CHECKED     (1<<1) | ||||
| #define XML_ENT_EXPANDING   (1<<2) | ||||
| #define XML_ENT_CHECKED_LT  (1<<3) | ||||
| #define XML_ENT_CONTAINS_LT (1<<4) | ||||
| #define XML_ENT_PARSED      (1u << 0) | ||||
| #define XML_ENT_CHECKED     (1u << 1) | ||||
| #define XML_ENT_VALIDATED   (1u << 2) | ||||
| #define XML_ENT_EXPANDING   (1u << 3) | ||||
|  | ||||
| XML_HIDDEN xmlChar * | ||||
| xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input); | ||||
|   | ||||
| @@ -87,4 +87,8 @@ XML_HIDDEN xmlParserInputPtr | ||||
| xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url, | ||||
|                 const char *chunk, int size, const char *encoding); | ||||
|  | ||||
| XML_HIDDEN xmlChar * | ||||
| xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str, | ||||
|                             int normalize); | ||||
|  | ||||
| #endif /* XML_PARSER_H_PRIVATE__ */ | ||||
|   | ||||
| @@ -1,9 +1,3 @@ | ||||
| ./test/errors/attr4.xml:1: parser error : invalid character in attribute value | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : attributes construct error | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1 | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
|   | ||||
| @@ -1,9 +1,3 @@ | ||||
| ./test/errors/attr4.xml:1: parser error : invalid character in attribute value | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : attributes construct error | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1 | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
|   | ||||
| @@ -1,10 +1,4 @@ | ||||
| ./test/errors/attr4.xml:1: parser error : invalid character in attribute value | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : attributes construct error | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT | ||||
| <ROOT attr="XY"/> | ||||
|               ^ | ||||
| ./test/errors/attr4.xml : failed to parse | ||||
|   | ||||
							
								
								
									
										5
									
								
								result/issue655.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								result/issue655.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| <?xml version="1.0"?> | ||||
| <!DOCTYPE test [ | ||||
| <!ENTITY newline "&#xA;"> | ||||
| ]> | ||||
| <test newline="&newline;"/> | ||||
							
								
								
									
										2
									
								
								result/issue655.xml.rde
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								result/issue655.xml.rde
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| 0 10 test 0 0 | ||||
| 0 1 test 1 0 | ||||
							
								
								
									
										2
									
								
								result/issue655.xml.rdr
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								result/issue655.xml.rdr
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| 0 10 test 0 0 | ||||
| 0 1 test 1 0 | ||||
							
								
								
									
										10
									
								
								result/issue655.xml.sax
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								result/issue655.xml.sax
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| SAX.setDocumentLocator() | ||||
| SAX.startDocument() | ||||
| SAX.internalSubset(test, , ) | ||||
| SAX.entityDecl(newline, 1, (null), (null), 
) | ||||
| SAX.getEntity(newline) | ||||
| SAX.externalSubset(test, , ) | ||||
| SAX.getEntity(newline) | ||||
| SAX.startElement(test, newline='&newline;') | ||||
| SAX.endElement(test) | ||||
| SAX.endDocument() | ||||
							
								
								
									
										10
									
								
								result/issue655.xml.sax2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								result/issue655.xml.sax2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| SAX.setDocumentLocator() | ||||
| SAX.startDocument() | ||||
| SAX.internalSubset(test, , ) | ||||
| SAX.entityDecl(newline, 1, (null), (null), 
) | ||||
| SAX.getEntity(newline) | ||||
| SAX.externalSubset(test, , ) | ||||
| SAX.getEntity(newline) | ||||
| SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline='&new...', 9) | ||||
| SAX.endElementNs(test, NULL, NULL) | ||||
| SAX.endDocument() | ||||
							
								
								
									
										5
									
								
								result/noent/issue655.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								result/noent/issue655.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| <?xml version="1.0"?> | ||||
| <!DOCTYPE test [ | ||||
| <!ENTITY newline "&#xA;"> | ||||
| ]> | ||||
| <test newline="
"/> | ||||
							
								
								
									
										11
									
								
								result/noent/issue655.xml.sax2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								result/noent/issue655.xml.sax2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| SAX.setDocumentLocator() | ||||
| SAX.startDocument() | ||||
| SAX.internalSubset(test, , ) | ||||
| SAX.entityDecl(newline, 1, (null), (null), 
) | ||||
| SAX.getEntity(newline) | ||||
| SAX.externalSubset(test, , ) | ||||
| SAX.getEntity(newline) | ||||
| SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline=' | ||||
| ...', 1) | ||||
| SAX.endElementNs(test, NULL, NULL) | ||||
| SAX.endDocument() | ||||
							
								
								
									
										4
									
								
								test/issue655.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								test/issue655.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| <!DOCTYPE test [ | ||||
|   <!ENTITY newline "&#xA;"> | ||||
| ]> | ||||
| <test newline="&newline;"/> | ||||
		Reference in New Issue
	
	Block a user