mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-26 00:37:43 +03:00 
			
		
		
		
	parser: Fix regression when switching input encodings
Revert some changes from commit 98840d40.
WebKit/Chromium can actually switch from ISO-8859-1 to UTF-16 in the
middle of parsing. This is a bad idea, but we have to keep supporting
this use case.
			
			
This commit is contained in:
		| @@ -1177,12 +1177,20 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (in->encoder != NULL) { |     if (in->encoder != NULL) { | ||||||
|  |         if (in->encoder == handler) | ||||||
|  |             return (0); | ||||||
|  |  | ||||||
|         /* |         /* | ||||||
|          * TODO: Detect encoding mismatch. We should start by comparing |          * Switching encodings during parsing is a really bad idea, | ||||||
|          * in->encoder->name and handler->name, but there are a few |          * but WebKit/Chromium switches from ISO-8859-1 to UTF-16 as soon as | ||||||
|          * compatible encodings like UTF-16 and UCS-2 or UTF-32 and UCS-4. |          * it finds Unicode characters with code points larger than 255. | ||||||
|  |          * | ||||||
|  |          * TODO: We should check whether the "raw" input buffer is empty and | ||||||
|  |          * convert the old content using the old encoder. | ||||||
|          */ |          */ | ||||||
|         xmlCharEncCloseFunc(handler); |  | ||||||
|  |         xmlCharEncCloseFunc(in->encoder); | ||||||
|  |         in->encoder = handler; | ||||||
|         return (0); |         return (0); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user