mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	html: Rework htmlLookupSequence
Rename to htmlLookupString and use strstr for increased performance.
This commit is contained in:
		
							
								
								
									
										112
									
								
								HTMLparser.c
									
									
									
									
									
								
							
							
						
						
									
										112
									
								
								HTMLparser.c
									
									
									
									
									
								
							| @@ -5265,67 +5265,54 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * htmlParseLookupSequence: |  * htmlParseLookupString: | ||||||
|  * @ctxt:  an HTML parser context |  * @ctxt:  an XML parser context | ||||||
|  * @first:  the first char to lookup |  * @startDelta: delta to apply at the start | ||||||
|  * @next:  the next char to lookup or zero |  * @str:  string | ||||||
|  * @third:  the next char to lookup or zero |  * @strLen:  length of string | ||||||
|  * |  * | ||||||
|  * Try to find if a sequence (first, next, third) or  just (first next) or |  * Check whether the input buffer contains a string. | ||||||
|  * (first) is available in the input stream. |  | ||||||
|  * This function has a side effect of (possibly) incrementing ctxt->checkIndex |  | ||||||
|  * to avoid rescanning sequences of bytes, it DOES change the state of the |  | ||||||
|  * parser, do not use liberally. |  | ||||||
|  * This is basically similar to xmlParseLookupSequence() |  | ||||||
|  * |  | ||||||
|  * Returns the index to the current parsing point if the full sequence |  | ||||||
|  *      is available, -1 otherwise. |  | ||||||
|  */ |  */ | ||||||
| static int | static int | ||||||
| htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, | htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta, | ||||||
|                         xmlChar next, xmlChar third) { |                       const char *str, size_t strLen) { | ||||||
|     size_t base, len; |     const xmlChar *cur, *term; | ||||||
|     htmlParserInputPtr in; |     int ret; | ||||||
|     const xmlChar *buf; |  | ||||||
|     int quote; |  | ||||||
|  |  | ||||||
|     in = ctxt->input; |     if (ctxt->checkIndex == 0) { | ||||||
|     if (in == NULL) |         cur = ctxt->input->cur + startDelta; | ||||||
|         return (-1); |     } else { | ||||||
|  |         cur = ctxt->input->cur + ctxt->checkIndex; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     base = ctxt->checkIndex; |     term = BAD_CAST strstr((const char *) cur, str); | ||||||
|     quote = ctxt->endCheckState; |     if (term == NULL) { | ||||||
|  |         const xmlChar *end = ctxt->input->end; | ||||||
|  |         size_t index; | ||||||
|  |  | ||||||
|     buf = in->cur; |         /* Rescan (strLen - 1) characters. */ | ||||||
|     len = in->end - in->cur; |         if ((size_t) (end - cur) < strLen) | ||||||
|  |             end = cur; | ||||||
|     /* take into account the sequence length */ |         else | ||||||
|     if (third) |             end -= strLen - 1; | ||||||
|         len -= 2; |         index = end - ctxt->input->cur; | ||||||
|     else if (next) |         if (index > INT_MAX / 2) { | ||||||
|         len--; |  | ||||||
|     for (; base < len; base++) { |  | ||||||
|         if (base >= INT_MAX / 2) { |  | ||||||
|             ctxt->checkIndex = 0; |             ctxt->checkIndex = 0; | ||||||
|             ctxt->endCheckState = 0; |             ret = INT_MAX / 2; | ||||||
|             return (base - 2); |         } else { | ||||||
|         } |             ctxt->checkIndex = index; | ||||||
|         if (buf[base] == first) { |             ret = -1; | ||||||
|             if (third != 0) { |  | ||||||
|                 if ((buf[base + 1] != next) || (buf[base + 2] != third)) |  | ||||||
|                     continue; |  | ||||||
|             } else if (next != 0) { |  | ||||||
|                 if (buf[base + 1] != next) |  | ||||||
|                     continue; |  | ||||||
|         } |         } | ||||||
|  |     } else { | ||||||
|         ctxt->checkIndex = 0; |         ctxt->checkIndex = 0; | ||||||
|             ctxt->endCheckState = 0; |  | ||||||
|             return (base); |         if (term - ctxt->input->cur > INT_MAX / 2) | ||||||
|  |             ret = INT_MAX / 2; | ||||||
|  |         else | ||||||
|  |             ret = term - ctxt->input->cur; | ||||||
|     } |     } | ||||||
|     } |  | ||||||
|     ctxt->checkIndex = base; |     return(ret); | ||||||
|     ctxt->endCheckState = quote; |  | ||||||
|     return (-1); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
| @@ -5338,7 +5325,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, | |||||||
|  * This function has a side effect of (possibly) incrementing ctxt->checkIndex |  * This function has a side effect of (possibly) incrementing ctxt->checkIndex | ||||||
|  * to avoid rescanning sequences of bytes, it DOES change the state of the |  * to avoid rescanning sequences of bytes, it DOES change the state of the | ||||||
|  * parser, do not use liberally. |  * parser, do not use liberally. | ||||||
|  * This wraps to htmlParseLookupSequence() |  | ||||||
|  * |  * | ||||||
|  * Returns the index to the current parsing point if the full sequence is available, -1 otherwise. |  * Returns the index to the current parsing point if the full sequence is available, -1 otherwise. | ||||||
|  */ |  */ | ||||||
| @@ -5349,7 +5335,7 @@ htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt) | |||||||
|     int offset; |     int offset; | ||||||
|  |  | ||||||
|     while (1) { |     while (1) { | ||||||
| 	mark = htmlParseLookupSequence(ctxt, '-', '-', 0); | 	mark = htmlParseLookupString(ctxt, 2, "--", 2); | ||||||
| 	if (mark < 0) | 	if (mark < 0) | ||||||
|             break; |             break; | ||||||
|         if ((NXT(mark+2) == '>') || |         if ((NXT(mark+2) == '>') || | ||||||
| @@ -5457,7 +5443,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		    (UPP(6) == 'Y') && (UPP(7) == 'P') && | 		    (UPP(6) == 'Y') && (UPP(7) == 'P') && | ||||||
| 		    (UPP(8) == 'E')) { | 		    (UPP(8) == 'E')) { | ||||||
| 		    if ((!terminate) && | 		    if ((!terminate) && | ||||||
| 		        (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) | 		        (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		    htmlParseDocTypeDecl(ctxt); | 		    htmlParseDocTypeDecl(ctxt); | ||||||
| 		    ctxt->instate = XML_PARSER_PROLOG; | 		    ctxt->instate = XML_PARSER_PROLOG; | ||||||
| @@ -5493,7 +5479,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		    ctxt->instate = XML_PARSER_MISC; | 		    ctxt->instate = XML_PARSER_MISC; | ||||||
| 	        } else if ((cur == '<') && (next == '?')) { | 	        } else if ((cur == '<') && (next == '?')) { | ||||||
| 		    if ((!terminate) && | 		    if ((!terminate) && | ||||||
| 		        (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) | 		        (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		    htmlParsePI(ctxt); | 		    htmlParsePI(ctxt); | ||||||
| 		    ctxt->instate = XML_PARSER_MISC; | 		    ctxt->instate = XML_PARSER_MISC; | ||||||
| @@ -5503,7 +5489,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		    (UPP(6) == 'Y') && (UPP(7) == 'P') && | 		    (UPP(6) == 'Y') && (UPP(7) == 'P') && | ||||||
| 		    (UPP(8) == 'E')) { | 		    (UPP(8) == 'E')) { | ||||||
| 		    if ((!terminate) && | 		    if ((!terminate) && | ||||||
| 		        (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) | 		        (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		    htmlParseDocTypeDecl(ctxt); | 		    htmlParseDocTypeDecl(ctxt); | ||||||
| 		    ctxt->instate = XML_PARSER_PROLOG; | 		    ctxt->instate = XML_PARSER_PROLOG; | ||||||
| @@ -5529,7 +5515,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		    ctxt->instate = XML_PARSER_PROLOG; | 		    ctxt->instate = XML_PARSER_PROLOG; | ||||||
| 	        } else if ((cur == '<') && (next == '?')) { | 	        } else if ((cur == '<') && (next == '?')) { | ||||||
| 		    if ((!terminate) && | 		    if ((!terminate) && | ||||||
| 		        (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) | 		        (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		    htmlParsePI(ctxt); | 		    htmlParsePI(ctxt); | ||||||
| 		    ctxt->instate = XML_PARSER_PROLOG; | 		    ctxt->instate = XML_PARSER_PROLOG; | ||||||
| @@ -5560,7 +5546,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		    ctxt->instate = XML_PARSER_EPILOG; | 		    ctxt->instate = XML_PARSER_EPILOG; | ||||||
| 	        } else if ((cur == '<') && (next == '?')) { | 	        } else if ((cur == '<') && (next == '?')) { | ||||||
| 		    if ((!terminate) && | 		    if ((!terminate) && | ||||||
| 		        (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) | 		        (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) | ||||||
| 			goto done; | 			goto done; | ||||||
| 		    htmlParsePI(ctxt); | 		    htmlParsePI(ctxt); | ||||||
| 		    ctxt->instate = XML_PARSER_EPILOG; | 		    ctxt->instate = XML_PARSER_EPILOG; | ||||||
| @@ -5732,7 +5718,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
| 		        int idx; | 		        int idx; | ||||||
| 			xmlChar val; | 			xmlChar val; | ||||||
|  |  | ||||||
| 			idx = htmlParseLookupSequence(ctxt, '<', '/', 0); | 			idx = htmlParseLookupString(ctxt, 0, "</", 2); | ||||||
| 			if (idx < 0) | 			if (idx < 0) | ||||||
| 			    goto done; | 			    goto done; | ||||||
| 		        val = in->cur[idx + 2]; | 		        val = in->cur[idx + 2]; | ||||||
| @@ -5762,7 +5748,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
|                         (UPP(6) == 'Y') && (UPP(7) == 'P') && |                         (UPP(6) == 'Y') && (UPP(7) == 'P') && | ||||||
|                         (UPP(8) == 'E')) { |                         (UPP(8) == 'E')) { | ||||||
|                         if ((!terminate) && |                         if ((!terminate) && | ||||||
|                             (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) |                             (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) | ||||||
|                             goto done; |                             goto done; | ||||||
|                         htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, |                         htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, | ||||||
|                                      "Misplaced DOCTYPE declaration\n", |                                      "Misplaced DOCTYPE declaration\n", | ||||||
| @@ -5776,13 +5762,13 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
|                         ctxt->instate = XML_PARSER_CONTENT; |                         ctxt->instate = XML_PARSER_CONTENT; | ||||||
|                     } else { |                     } else { | ||||||
|                         if ((!terminate) && |                         if ((!terminate) && | ||||||
|                             (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) |                             (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) | ||||||
|                             goto done; |                             goto done; | ||||||
|                         htmlSkipBogusComment(ctxt); |                         htmlSkipBogusComment(ctxt); | ||||||
|                     } |                     } | ||||||
|                 } else if ((cur == '<') && (next == '?')) { |                 } else if ((cur == '<') && (next == '?')) { | ||||||
|                     if ((!terminate) && |                     if ((!terminate) && | ||||||
|                         (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) |                         (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) | ||||||
|                         goto done; |                         goto done; | ||||||
|                     htmlParsePI(ctxt); |                     htmlParsePI(ctxt); | ||||||
|                     ctxt->instate = XML_PARSER_CONTENT; |                     ctxt->instate = XML_PARSER_CONTENT; | ||||||
| @@ -5810,7 +5796,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | |||||||
|                      * data detection. |                      * data detection. | ||||||
|                      */ |                      */ | ||||||
|                     if ((!terminate) && |                     if ((!terminate) && | ||||||
|                         (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) |                         (htmlParseLookupString(ctxt, 0, "<", 1) < 0)) | ||||||
|                         goto done; |                         goto done; | ||||||
|                     ctxt->checkIndex = 0; |                     ctxt->checkIndex = 0; | ||||||
|                     while ((PARSER_STOPPED(ctxt) == 0) && |                     while ((PARSER_STOPPED(ctxt) == 0) && | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user