mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	Fix quadratic runtime when push parsing HTML start tags
Make sure that htmlParseStartTag doesn't terminate on characters for which IS_CHAR_CH is false like control chars. In htmlParseTryOrFinish, only switch to START_TAG if the next character starts a valid name. Otherwise, htmlParseStartTag might return without consuming all characters up to the final '>'. Found by OSS-Fuzz.
This commit is contained in:
		
							
								
								
									
										32
									
								
								HTMLparser.c
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								HTMLparser.c
									
									
									
									
									
								
							| @@ -3796,7 +3796,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { | |||||||
|  |  | ||||||
|  |  | ||||||
| 	/* Dump the bogus tag like browsers do */ | 	/* Dump the bogus tag like browsers do */ | ||||||
| 	while ((IS_CHAR_CH(CUR)) && (CUR != '>') && | 	while ((CUR != 0) && (CUR != '>') && | ||||||
|                (ctxt->instate != XML_PARSER_EOF)) |                (ctxt->instate != XML_PARSER_EOF)) | ||||||
| 	    NEXT; | 	    NEXT; | ||||||
|         return -1; |         return -1; | ||||||
| @@ -3852,7 +3852,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { | |||||||
|      * (S Attribute)* S? |      * (S Attribute)* S? | ||||||
|      */ |      */ | ||||||
|     SKIP_BLANKS; |     SKIP_BLANKS; | ||||||
|     while ((IS_CHAR_CH(CUR)) && |     while ((CUR != 0) && | ||||||
|            (CUR != '>') && |            (CUR != '>') && | ||||||
| 	   ((CUR != '/') || (NXT(1) != '>'))) { | 	   ((CUR != '/') || (NXT(1) != '>'))) { | ||||||
| 	long cons = ctxt->nbChars; | 	long cons = ctxt->nbChars; | ||||||
| @@ -3915,7 +3915,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { | |||||||
| 	        xmlFree(attvalue); | 	        xmlFree(attvalue); | ||||||
| 	    /* Dump the bogus attribute string up to the next blank or | 	    /* Dump the bogus attribute string up to the next blank or | ||||||
| 	     * the end of the tag. */ | 	     * the end of the tag. */ | ||||||
| 	    while ((IS_CHAR_CH(CUR)) && | 	    while ((CUR != 0) && | ||||||
| 	           !(IS_BLANK_CH(CUR)) && (CUR != '>') && | 	           !(IS_BLANK_CH(CUR)) && (CUR != '>') && | ||||||
| 		   ((CUR != '/') || (NXT(1) != '>'))) | 		   ((CUR != '/') || (NXT(1) != '>'))) | ||||||
| 		NEXT; | 		NEXT; | ||||||
| @@ -5475,7 +5475,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		           (avail < 9)) { | 		           (avail < 9)) { | ||||||
| 		    goto done; | 		    goto done; | ||||||
| 		} else { | 		} else { | ||||||
| 		    ctxt->instate = XML_PARSER_START_TAG; | 		    ctxt->instate = XML_PARSER_CONTENT; | ||||||
| #ifdef DEBUG_PUSH | #ifdef DEBUG_PUSH | ||||||
| 		    xmlGenericError(xmlGenericErrorContext, | 		    xmlGenericError(xmlGenericErrorContext, | ||||||
| 			    "HPP: entering START_TAG\n"); | 			    "HPP: entering START_TAG\n"); | ||||||
| @@ -5518,7 +5518,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		           (avail < 4)) { | 		           (avail < 4)) { | ||||||
| 		    goto done; | 		    goto done; | ||||||
| 		} else { | 		} else { | ||||||
| 		    ctxt->instate = XML_PARSER_START_TAG; | 		    ctxt->instate = XML_PARSER_CONTENT; | ||||||
| #ifdef DEBUG_PUSH | #ifdef DEBUG_PUSH | ||||||
| 		    xmlGenericError(xmlGenericErrorContext, | 		    xmlGenericError(xmlGenericErrorContext, | ||||||
| 			    "HPP: entering START_TAG\n"); | 			    "HPP: entering START_TAG\n"); | ||||||
| @@ -5831,12 +5831,34 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| #endif | #endif | ||||||
| 			break; | 			break; | ||||||
| 		    } else if (cur == '<') { | 		    } else if (cur == '<') { | ||||||
|  |                         if ((!terminate) && (next == 0)) | ||||||
|  |                             goto done; | ||||||
|  |                         /* | ||||||
|  |                          * Only switch to START_TAG if the next character | ||||||
|  |                          * starts a valid name. Otherwise, htmlParseStartTag | ||||||
|  |                          * might return without consuming all characters | ||||||
|  |                          * up to the final '>'. | ||||||
|  |                          */ | ||||||
|  |                         if ((IS_ASCII_LETTER(next)) || | ||||||
|  |                             (next == '_') || (next == ':') || (next == '.')) { | ||||||
|                             ctxt->instate = XML_PARSER_START_TAG; |                             ctxt->instate = XML_PARSER_START_TAG; | ||||||
|                             ctxt->checkIndex = 0; |                             ctxt->checkIndex = 0; | ||||||
| #ifdef DEBUG_PUSH | #ifdef DEBUG_PUSH | ||||||
|                             xmlGenericError(xmlGenericErrorContext, |                             xmlGenericError(xmlGenericErrorContext, | ||||||
|                                     "HPP: entering START_TAG\n"); |                                     "HPP: entering START_TAG\n"); | ||||||
| #endif | #endif | ||||||
|  |                         } else { | ||||||
|  |                             htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | ||||||
|  |                                          "htmlParseTryOrFinish: " | ||||||
|  |                                          "invalid element name\n", | ||||||
|  |                                          NULL, NULL); | ||||||
|  |                             htmlCheckParagraph(ctxt); | ||||||
|  |                             if ((ctxt->sax != NULL) && | ||||||
|  |                                 (ctxt->sax->characters != NULL)) | ||||||
|  |                                 ctxt->sax->characters(ctxt->userData, | ||||||
|  |                                                       in->cur, 1); | ||||||
|  |                             NEXT; | ||||||
|  |                         } | ||||||
| 			break; | 			break; | ||||||
| 		    } else { | 		    } else { | ||||||
| 		        /* | 		        /* | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user