diff --git a/python/meson.build b/python/meson.build index a261d850..e5b8fce2 100644 --- a/python/meson.build +++ b/python/meson.build @@ -95,6 +95,7 @@ if py.found() == true 'tstURI.py', 'tstmem.py', 'tstxpath.py', + 'unicode.py', 'validDTD.py', 'validRNG.py', 'validSchemas.py', diff --git a/python/tests/Makefile.am b/python/tests/Makefile.am index 10a5f9ce..83d5a0a6 100644 --- a/python/tests/Makefile.am +++ b/python/tests/Makefile.am @@ -45,7 +45,8 @@ PYTESTS= \ validRNG.py \ compareNodes.py \ xpathns.py \ - xpathleak.py + xpathleak.py \ + unicode.py XMLS= \ tst.xml \ diff --git a/python/tests/unicode.py b/python/tests/unicode.py new file mode 100755 index 00000000..5f2d34b4 --- /dev/null +++ b/python/tests/unicode.py @@ -0,0 +1,61 @@ +from io import BytesIO, StringIO +import libxml2 +import sys +from xml.sax.handler import ContentHandler +from xml.sax.xmlreader import InputSource +import xml.sax + + +# Test data: an XML file with a 100,000 Unicode smileys, which expand +# into 400,000 bytes after UTF-8 encoding. +SMILEY = '\U0001f600' +TEXT = 100_000 * SMILEY +XML_STRING = '\n' + TEXT + '' +XML_BYTES = XML_STRING.encode('utf-8') + + +def RunTest(test_name, source): + expected = TEXT + received = '' + + class TestHandler(ContentHandler): + def characters(self, content): + nonlocal received + received += content + + reader = xml.sax.make_parser(['drv_libxml2']) + reader.setContentHandler(TestHandler()) + reader.parse(source) + if expected != received: + print(test_name, 'failed!') + print('Expected length:', len(expected)) + print('Received length:', len(received)) + print('Expected text: (prefix only)', expected[:100]) + print('Received text: (prefix only)', received[:100]) + sys.exit(1) + + +def TestBytesInput(): + source = InputSource() + source.setByteStream(BytesIO(XML_BYTES)) + RunTest('TestBytesInput', source) + + +def TestStringInput(): + source = InputSource() + source.setCharacterStream(StringIO(XML_STRING)) + RunTest('TestStringInput', source) + + +# Memory debug specific +libxml2.debugMemory(1) + +TestBytesInput() +TestStringInput() + +# Memory debug specific +libxml2.cleanupParser() +if libxml2.debugMemory(1) == 0: + print("OK") +else: + print("Memory leak %d bytes" % (libxml2.debugMemory(1)))