Python codecs 模块,c() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用codecs.c()。
def test_xmlcharnamereplace(self):
# This time use a named character entity for unencodable
# characters,if one is available.
def xmlcharnamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't kNow how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
try:
l.append("&%s;" % html.entities.codepoint2name[ord(c)])
except KeyError:
l.append("&#%d;" % ord(c))
return ("".join(l), exc.end)
codecs.register_error(
"test.xmlcharnamereplace", xmlcharnamereplace)
sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
sout = b"«ℜ» = ⟨ሴ€⟩"
self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
sout = b"\xabℜ\xbb = ⟨ሴ€⟩"
self.assertEqual(sin.encode("latin-1", sout)
sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩"
self.assertEqual(sin.encode("iso-8859-15", sout)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means,that
# to be able to use e.g. the "replace" handler,the
# charmap has to have a mapping for "?".
charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh")
sin = "abc"
sout = b"AABBCC"
self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
sin = "abcA"
self.assertRaises(UnicodeError, codecs.charmap_encode, sin, charmap)
charmap[ord("?")] = b"XYZ"
sin = "abcDEF"
sout = b"AABBCCXYZXYZXYZ"
self.assertEqual(codecs.charmap_encode(sin, "replace", sout)
charmap[ord("?")] = "XYZ" # wrong type in mapping
self.assertRaises(TypeError, charmap)
def test_xmlcharnamereplace(self):
# This time use a named character entity for unencodable
# characters, UnicodeEncodeError):
raise TypeError("don't kNow how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
try:
l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
except KeyError:
l.append(u"&#%d;" % ord(c))
return (u"".join(l), xmlcharnamereplace)
sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
sout = "«ℜ» = ⟨ሴ€⟩"
self.assertEqual(sin.encode("ascii", sout)
sout = "\xabℜ\xbb = ⟨ሴ€⟩"
self.assertEqual(sin.encode("latin-1", sout)
sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
self.assertEqual(sin.encode("iso-8859-15", sout)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means,the
# charmap has to have a mapping for "?".
charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
sin = u"abc"
sout = "AABBCC"
self.assertEqual(codecs.charmap_encode(sin, sout)
sin = u"abcA"
self.assertRaises(UnicodeError, charmap)
charmap[ord("?")] = "XYZ"
sin = u"abcDEF"
sout = "AABBCCXYZXYZXYZ"
self.assertEqual(codecs.charmap_encode(sin, sout)
charmap[ord("?")] = u"XYZ"
self.assertRaises(TypeError, charmap)
charmap[ord("?")] = u"XYZ"
self.assertRaises(TypeError, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, charmap)
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,not the more logical bytes,because the codecs
register_error functionality expects this.
"""
decoded = []
for ch in mystring:
# if PY3:
# code = ch
# else:
code = ord(ch)
# The following magic comes from Py3.3's Python/codecs.c file:
if not 0xD800 <= code <= 0xDCFF:
# Not a surrogate. Fail with the original exception.
raise exc
# mybytes = [0xe0 | (code >> 12),
# 0x80 | ((code >> 6) & 0x3f),
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,because the codecs
register_error functionality expects this.
"""
decoded = []
for ch in mystring:
# if utils.PY3:
# code = ch
# else:
code = ord(ch)
# The following magic comes from Py3.3's Python/codecs.c file:
if not 0xD800 <= code <= 0xDCFF:
# Not a surrogate. Fail with the original exception.
raise exc
# mybytes = [0xe0 | (code >> 12),
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def test_uninamereplace(self):
# We're using the names from the unicode database this time,
# and we're doing "Syntax highlighting" here,i.e. we include
# the replaced text in ANSI escape sequences. For this it is
# useful that the error handler is not called for every single
# unencodable character,but for a complete sequence of
# unencodable characters,otherwise we would output many
# unnecessary escape sequences.
def uninamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't kNow how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
l.append(unicodedata.name(c, "0x%x" % ord(c)))
return ("\033[1m%s\033[0m" % ",".join(l), exc.end)
codecs.register_error(
"test.uninamereplace", uninamereplace)
sin = "\xac\u1234\u20ac\u8000"
sout = b"\033[1mNOT SIGN,ETHIOPIC SYLLABLE SEE,EURO SIGN,CJK UNIFIED IDEOGRAPH-8000\033[0m"
self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE,CJK UNIFIED IDEOGRAPH-8000\033[0m"
self.assertEqual(sin.encode("latin-1", sout)
sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
self.assertEqual(sin.encode("iso-8859-15", sout)
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
42
)
# "xmlcharrefreplace" complains about the wrong exception types
self.assertRaises(
TypeError,
UnicodeError("ouch")
)
# "xmlcharrefreplace" can only be used for encoding
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
)
self.assertRaises(
TypeError,
UnicodeTranslateError("\u3042", "ouch")
)
# Use the correct exception
cs = (0, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
s = "".join(chr(c) for c in cs)
self.assertEqual(
codecs.xmlcharrefreplace_errors(
UnicodeEncodeError("ascii", s, len(s), "ouch")
),
("".join("&#%d;" % ord(c) for c in s), len(s))
)
def test_badregistercall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::register_error()
# Python/codecs.c::PyCodec_RegisterError()
self.assertRaises(TypeError, codecs.register_error, 42)
self.assertRaises(TypeError, "test.dummy", 42)
def test_xmlcharrefvalues(self):
# enhance coverage of:
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, 5, 50, 500, 5000, 50000)
if sys.maxunicode>=100000:
v += (100000, 500000, 1000000)
s = "".join([chr(x) for x in v])
codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
for enc in ("ascii", "iso-8859-15"):
for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
s.encode(enc, err)
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python,so we can't test that much)
class D(dict):
def __getitem__(self, key):
raise ValueError
#self.assertRaises(ValueError,"\xff".translate,D())
self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, {0xff: ()})
def test_uninamereplace(self):
# We're using the names from the unicode database this time, u"0x%x" % ord(c)))
return (u"\033[1m%s\033[0m" % u", uninamereplace)
sin = u"\xac\u1234\u20ac\u8000"
sout = "\033[1mNOT SIGN, sout)
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, sout)
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
self.assertEqual(sin.encode("iso-8859-15", sout)
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
TypeError, "\xff",
UnicodeTranslateError(u"\u3042", 10000)
cs += (0xdfff, 0xd800)
s = u"".join(unichr(c) for c in cs)
s += u"\U0001869f\U000186a0\U000f423f\U000f4240"
cs += (99999, 100000, 999999, 1000000)
self.assertEqual(
codecs.xmlcharrefreplace_errors(
UnicodeEncodeError("ascii", u"a" + s + u"b",
1, 1 + len(s),
(u"".join(u"&#%d;" % c for c in cs), 1 + len(s))
)
def test_badregistercall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::register_error()
# Python/codecs.c::PyCodec_RegisterError()
self.assertRaises(TypeError, 42)
def test_xmlcharrefvalues(self):
# enhance coverage of:
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, 1000000)
s = u"".join([unichr(x) for x in v])
codecs.register_error("test.xmlcharrefreplace", err)
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, key):
raise ValueError
self.assertRaises(ValueError, u"\xff".translate, D())
self.assertRaises(TypeError, {0xff: ()})
def test_uninamereplace(self):
# We're using the names from the unicode database this time, sout)
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
TypeError, 1 + len(s))
)
def test_badregistercall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::register_error()
# Python/codecs.c::PyCodec_RegisterError()
self.assertRaises(TypeError, 42)
def test_xmlcharrefvalues(self):
# enhance coverage of:
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, err)
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, {0xff: ()})
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def test_uninamereplace(self):
# We're using the names from the unicode database this time, sout)
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
TypeError, len(s))
)
def test_badregistercall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::register_error()
# Python/codecs.c::PyCodec_RegisterError()
self.assertRaises(TypeError, 42)
def test_xmlcharrefvalues(self):
# enhance coverage of:
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, 50000)
if SIZEOF_WCHAR_T == 4:
v += (100000, err)
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, {0xff: ()})
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def replace_surrogate_encode(mystring):
"""
Returns a (unicode) string,
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)
def test_uninamereplace(self):
# We're using the names from the unicode database this time, sout)
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
TypeError, 0x3042)
s = "".join(unichr(c) for c in cs)
self.assertEqual(
codecs.xmlcharrefreplace_errors(
UnicodeEncodeError("ascii",
(u"".join(u"&#%d;" % ord(c) for c in s), len(s))
)
def test_badregistercall(self):
# enhance coverage of:
# Modules/_codecsmodule.c::register_error()
# Python/codecs.c::PyCodec_RegisterError()
self.assertRaises(TypeError, 42)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。