From 2dcb937a9ae59e9865a6957d40974db7a9033534 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Wed, 16 Jul 2003 21:18:19 +0000 Subject: [PATCH] patch from Dodji Seketeli about UTF16 BOM when using the push XML parser. * parserInternals.c: patch from Dodji Seketeli about UTF16 BOM when using the push XML parser. * result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml: added the test to the regression suite. Daniel --- ChangeLog | 7 +++++++ parserInternals.c | 17 +++++++++++++++++ result/noent/utf16bom.xml | Bin 0 -> 258 bytes result/utf16bom.xml | Bin 0 -> 258 bytes test/utf16bom.xml | Bin 0 -> 256 bytes 5 files changed, 24 insertions(+) create mode 100644 result/noent/utf16bom.xml create mode 100644 result/utf16bom.xml create mode 100644 test/utf16bom.xml diff --git a/ChangeLog b/ChangeLog index 26107dfc..8a8411fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Jul 16 23:15:53 CEST 2003 Daniel Veillard + + * parserInternals.c: patch from Dodji Seketeli about UTF16 BOM + when using the push XML parser. + * result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml: + added the test to the regression suite. + Tue Jul 15 22:03:13 CEST 2003 Daniel Veillard * globals.c: add xmlThrDefMutex = NULL in xmlCleanupGlobals() diff --git a/parserInternals.c b/parserInternals.c index 8e57cdaa..9c71570a 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1621,6 +1621,23 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) ctxt->input->cur += 3; } return(0); + case XML_CHAR_ENCODING_UTF16LE: + case XML_CHAR_ENCODING_UTF16BE: + /*The raw input characters are encoded + *in UTF-16. As we expect this function + *to be called after xmlCharEncInFunc, we expect + *ctxt->input->cur to contain UTF-8 encoded characters. + *So the raw UTF16 Byte Order Mark + *has also been converted into + *an UTF-8 BOM. Let's skip that BOM. + */ + if ((ctxt->input != NULL) && + (ctxt->input->cur[0] == 0xEF) && + (ctxt->input->cur[1] == 0xBB) && + (ctxt->input->cur[2] == 0xBF)) { + ctxt->input->cur += 3; + } + break ; default: break; } diff --git a/result/noent/utf16bom.xml b/result/noent/utf16bom.xml new file mode 100644 index 0000000000000000000000000000000000000000..6ea296e21c67717a65d9c68cd7d658a24c8e4a53 GIT binary patch literal 258 zcmZ9G!3x4K5JcbEuL$0?dht}-n_r-3kv53X#?l5cKd(+!Q1p^rW?p9Y`6#Km)6?O& z;Tf5@a3!#1&YC4FO(!SNNN?o^&gLH2vtuD@W6VV4B)8SuS%x<*>Gvd}rea2^4$mMz vH$}~nq=sL0l%6<>Xe}dJMpR0rgrJ7rltEjg&3&|d_m?h=FaFZso%xy<)}<~# literal 0 HcmV?d00001 diff --git a/result/utf16bom.xml b/result/utf16bom.xml new file mode 100644 index 0000000000000000000000000000000000000000..6ea296e21c67717a65d9c68cd7d658a24c8e4a53 GIT binary patch literal 258 zcmZ9G!3x4K5JcbEuL$0?dht}-n_r-3kv53X#?l5cKd(+!Q1p^rW?p9Y`6#Km)6?O& z;Tf5@a3!#1&YC4FO(!SNNN?o^&gLH2vtuD@W6VV4B)8SuS%x<*>Gvd}rea2^4$mMz vH$}~nq=sL0l%6<>Xe}dJMpR0rgrJ7rltEjg&3&|d_m?h=FaFZso%xy<)}<~# literal 0 HcmV?d00001 diff --git a/test/utf16bom.xml b/test/utf16bom.xml new file mode 100644 index 0000000000000000000000000000000000000000..1916dc1ee83b74ade66fd747d5f58a8ee414b08d GIT binary patch literal 256 zcmZ9G!3x4K5JcbEuL$0?dht}#n_r-3kv53X#*zlHKd(+#Q1p^rW?p9Y`6#Kl)6wF% z5l9SNxDweiXU&q8rjrw?r8jaTXLAqi*|89{F=ilhlH2HQEh88=>h~a`!ZV{(N1&HK uHD%4jsD@uPNgp|iXe=XJMplYfLR3R%O0TWf=04hV_m?h=FaFZsot_tZCoVMr literal 0 HcmV?d00001