1
0
mirror of https://github.com/ThunderEX/py-kms.git synced 2025-04-19 18:22:22 +03:00
py-kms/upy/codecs.py
2017-08-30 16:40:16 +08:00

86 lines
2.9 KiB
Python

__all__ = ['encode']
# from source of pypy:
# pypy / pypy / module / _codecs / interp_codecs.py
# pypy / rpython / rlib / runicode.py
import sys
BYTEORDER = sys.byteorder
BYTEORDER2 = BYTEORDER[0] + 'e' # either "le" or "be"
assert BYTEORDER2 in ('le', 'be')
def _storechar(result, CH, byteorder):
hi = ((CH) >> 8) & 0xff
lo = (CH) & 0xff
if byteorder == 'little':
result.append(lo)
result.append(hi)
else:
result.append(hi)
result.append(lo)
def encode_utf_16(s, errors,
errorhandler=None,
allow_surrogates=True,
byteorder='little',
public_encoding_name='utf16'):
if errorhandler is None:
pass
# errorhandler = default_unicode_error_encode
result = bytearray()
if byteorder == 'native':
_storechar(result, 0xFEFF, BYTEORDER)
byteorder = BYTEORDER
for pos, c in enumerate(s):
ch = ord(c)
if ch < 0xD800:
_storechar(result, ch, byteorder)
elif ch >= 0x10000:
_storechar(result, 0xD800 | ((ch-0x10000) >> 10), byteorder)
_storechar(result, 0xDC00 | ((ch-0x10000) & 0x3FF), byteorder)
elif ch >= 0xE000 or allow_surrogates:
_storechar(result, ch, byteorder)
else:
ru, rs, pos = errorhandler(errors, public_encoding_name,
'surrogates not allowed',
s, pos-1, pos)
if rs is not None:
# py3k only
if len(rs) % 2 != 0:
errorhandler('strict', public_encoding_name,
'surrogates not allowed',
s, pos-1, pos)
result.append(rs)
continue
for ch in ru:
if ord(ch) < 0xD800:
_storechar(result, ord(ch), byteorder)
else:
errorhandler('strict', public_encoding_name,
'surrogates not allowed',
s, pos-1, pos)
continue
return bytes(result)
def encode(obj, encoding='utf_8', errors='strict'):
if encoding == 'utf_8':
return obj.encode(encoding, errors)
elif encoding == 'utf_16':
return encode_utf_16(obj, [], None,
True, 'native',
'utf-16-' + BYTEORDER2)
elif encoding == 'utf_16_be':
return encode_utf_16(obj, [], None,
True, 'big',
'utf-16-be')
elif encoding == 'utf_16_le':
return encode_utf_16(obj, [], None,
True, 'little',
'utf-16-le')
else:
raise NotImplementedError('Encoding of {} not implemented'.format(encoding))