home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 July / maximum-cd-2011-07.iso / DiscContents / LibO_3.3.2_Win_x86_install_multi.exe / libreoffice1.cab / test_codeccallbacks.py < prev    next >
Encoding:
Python Source  |  2011-03-15  |  30.4 KB  |  803 lines

  1. import test.test_support, unittest
  2. import sys, codecs, htmlentitydefs, unicodedata
  3.  
  4. class PosReturn:
  5.     # this can be used for configurable callbacks
  6.  
  7.     def __init__(self):
  8.         self.pos = 0
  9.  
  10.     def handle(self, exc):
  11.         oldpos = self.pos
  12.         realpos = oldpos
  13.         if realpos<0:
  14.             realpos = len(exc.object) + realpos
  15.         # if we don't advance this time, terminate on the next call
  16.         # otherwise we'd get an endless loop
  17.         if realpos <= exc.start:
  18.             self.pos = len(exc.object)
  19.         return (u"<?>", oldpos)
  20.  
  21. # A UnicodeEncodeError object with a bad start attribute
  22. class BadStartUnicodeEncodeError(UnicodeEncodeError):
  23.     def __init__(self):
  24.         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
  25.         self.start = []
  26.  
  27. # A UnicodeEncodeError object with a bad object attribute
  28. class BadObjectUnicodeEncodeError(UnicodeEncodeError):
  29.     def __init__(self):
  30.         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
  31.         self.object = []
  32.  
  33. # A UnicodeDecodeError object without an end attribute
  34. class NoEndUnicodeDecodeError(UnicodeDecodeError):
  35.     def __init__(self):
  36.         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
  37.         del self.end
  38.  
  39. # A UnicodeDecodeError object with a bad object attribute
  40. class BadObjectUnicodeDecodeError(UnicodeDecodeError):
  41.     def __init__(self):
  42.         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
  43.         self.object = []
  44.  
  45. # A UnicodeTranslateError object without a start attribute
  46. class NoStartUnicodeTranslateError(UnicodeTranslateError):
  47.     def __init__(self):
  48.         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
  49.         del self.start
  50.  
  51. # A UnicodeTranslateError object without an end attribute
  52. class NoEndUnicodeTranslateError(UnicodeTranslateError):
  53.     def __init__(self):
  54.         UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
  55.         del self.end
  56.  
  57. # A UnicodeTranslateError object without an object attribute
  58. class NoObjectUnicodeTranslateError(UnicodeTranslateError):
  59.     def __init__(self):
  60.         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
  61.         del self.object
  62.  
  63. class CodecCallbackTest(unittest.TestCase):
  64.  
  65.     def test_xmlcharrefreplace(self):
  66.         # replace unencodable characters which numeric character entities.
  67.         # For ascii, latin-1 and charmaps this is completely implemented
  68.         # in C and should be reasonably fast.
  69.         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
  70.         self.assertEqual(
  71.             s.encode("ascii", "xmlcharrefreplace"),
  72.             "スパモ änd eggs"
  73.         )
  74.         self.assertEqual(
  75.             s.encode("latin-1", "xmlcharrefreplace"),
  76.             "スパモ \xe4nd eggs"
  77.         )
  78.  
  79.     def test_xmlcharnamereplace(self):
  80.         # This time use a named character entity for unencodable
  81.         # characters, if one is available.
  82.  
  83.         def xmlcharnamereplace(exc):
  84.             if not isinstance(exc, UnicodeEncodeError):
  85.                 raise TypeError("don't know how to handle %r" % exc)
  86.             l = []
  87.             for c in exc.object[exc.start:exc.end]:
  88.                 try:
  89.                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
  90.                 except KeyError:
  91.                     l.append(u"&#%d;" % ord(c))
  92.             return (u"".join(l), exc.end)
  93.  
  94.         codecs.register_error(
  95.             "test.xmlcharnamereplace", xmlcharnamereplace)
  96.  
  97.         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
  98.         sout = "«ℜ» = ⟨ሴ€⟩"
  99.         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
  100.         sout = "\xabℜ\xbb = ⟨ሴ€⟩"
  101.         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
  102.         sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
  103.         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
  104.  
  105.     def test_uninamereplace(self):
  106.         # We're using the names from the unicode database this time,
  107.         # and we're doing "syntax highlighting" here, i.e. we include
  108.         # the replaced text in ANSI escape sequences. For this it is
  109.         # useful that the error handler is not called for every single
  110.         # unencodable character, but for a complete sequence of
  111.         # unencodable characters, otherwise we would output many
  112.         # unneccessary escape sequences.
  113.  
  114.         def uninamereplace(exc):
  115.             if not isinstance(exc, UnicodeEncodeError):
  116.                 raise TypeError("don't know how to handle %r" % exc)
  117.             l = []
  118.             for c in exc.object[exc.start:exc.end]:
  119.                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
  120.             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
  121.  
  122.         codecs.register_error(
  123.             "test.uninamereplace", uninamereplace)
  124.  
  125.         sin = u"\xac\u1234\u20ac\u8000"
  126.         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
  127.         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
  128.  
  129.         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
  130.         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
  131.  
  132.         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
  133.         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
  134.  
  135.     def test_backslashescape(self):
  136.         # Does the same as the "unicode-escape" encoding, but with different
  137.         # base encodings.
  138.         sin = u"a\xac\u1234\u20ac\u8000"
  139.         if sys.maxunicode > 0xffff:
  140.             sin += unichr(sys.maxunicode)
  141.         sout = "a\\xac\\u1234\\u20ac\\u8000"
  142.         if sys.maxunicode > 0xffff:
  143.             sout += "\\U%08x" % sys.maxunicode
  144.         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
  145.  
  146.         sout = "a\xac\\u1234\\u20ac\\u8000"
  147.         if sys.maxunicode > 0xffff:
  148.             sout += "\\U%08x" % sys.maxunicode
  149.         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
  150.  
  151.         sout = "a\xac\\u1234\xa4\\u8000"
  152.         if sys.maxunicode > 0xffff:
  153.             sout += "\\U%08x" % sys.maxunicode
  154.         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
  155.  
  156.     def test_decoderelaxedutf8(self):
  157.         # This is the test for a decoding callback handler,
  158.         # that relaxes the UTF-8 minimal encoding restriction.
  159.         # A null byte that is encoded as "\xc0\x80" will be
  160.         # decoded as a null byte. All other illegal sequences
  161.         # will be handled strictly.
  162.         def relaxedutf8(exc):
  163.             if not isinstance(exc, UnicodeDecodeError):
  164.                 raise TypeError("don't know how to handle %r" % exc)
  165.             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
  166.                 return (u"\x00", exc.start+2) # retry after two bytes
  167.             else:
  168.                 raise exc
  169.  
  170.         codecs.register_error(
  171.             "test.relaxedutf8", relaxedutf8)
  172.  
  173.         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
  174.         sout = u"a\x00b\x00c\xfc\x00\x00"
  175.         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
  176.         sin = "\xc0\x80\xc0\x81"
  177.         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
  178.  
  179.     def test_charmapencode(self):
  180.         # For charmap encodings the replacement string will be
  181.         # mapped through the encoding again. This means, that
  182.         # to be able to use e.g. the "replace" handler, the
  183.         # charmap has to have a mapping for "?".
  184.         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
  185.         sin = u"abc"
  186.         sout = "AABBCC"
  187.         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
  188.  
  189.         sin = u"abcA"
  190.         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
  191.  
  192.         charmap[ord("?")] = "XYZ"
  193.         sin = u"abcDEF"
  194.         sout = "AABBCCXYZXYZXYZ"
  195.         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
  196.  
  197.         charmap[ord("?")] = u"XYZ"
  198.         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
  199.  
  200.         charmap[ord("?")] = u"XYZ"
  201.         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
  202.  
  203.     def test_decodeunicodeinternal(self):
  204.         self.assertRaises(
  205.             UnicodeDecodeError,
  206.             "\x00\x00\x00\x00\x00".decode,
  207.             "unicode-internal",
  208.         )
  209.         if sys.maxunicode > 0xffff:
  210.             def handler_unicodeinternal(exc):
  211.                 if not isinstance(exc, UnicodeDecodeError):
  212.                     raise TypeError("don't know how to handle %r" % exc)
  213.                 return (u"\x01", 1)
  214.  
  215.             self.assertEqual(
  216.                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
  217.                 u"\u0000"
  218.             )
  219.  
  220.             self.assertEqual(
  221.                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
  222.                 u"\u0000\ufffd"
  223.             )
  224.  
  225.             codecs.register_error("test.hui", handler_unicodeinternal)
  226.  
  227.             self.assertEqual(
  228.                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
  229.                 u"\u0000\u0001\u0000"
  230.             )
  231.  
  232.     def test_callbacks(self):
  233.         def handler1(exc):
  234.             if not isinstance(exc, UnicodeEncodeError) \
  235.                and not isinstance(exc, UnicodeDecodeError):
  236.                 raise TypeError("don't know how to handle %r" % exc)
  237.             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
  238.             return (u"[%s]" % u"".join(l), exc.end)
  239.  
  240.         codecs.register_error("test.handler1", handler1)
  241.  
  242.         def handler2(exc):
  243.             if not isinstance(exc, UnicodeDecodeError):
  244.                 raise TypeError("don't know how to handle %r" % exc)
  245.             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
  246.             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
  247.  
  248.         codecs.register_error("test.handler2", handler2)
  249.  
  250.         s = "\x00\x81\x7f\x80\xff"
  251.  
  252.         self.assertEqual(
  253.             s.decode("ascii", "test.handler1"),
  254.             u"\x00[<129>]\x7f[<128>][<255>]"
  255.         )
  256.         self.assertEqual(
  257.             s.decode("ascii", "test.handler2"),
  258.             u"\x00[<129>][<128>]"
  259.         )
  260.  
  261.         self.assertEqual(
  262.             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
  263.             u"\u3042[<92><117><51><120>]xx"
  264.         )
  265.  
  266.         self.assertEqual(
  267.             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
  268.             u"\u3042[<92><117><51><120><120>]"
  269.         )
  270.  
  271.         self.assertEqual(
  272.             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
  273.             u"z[<98>][<99>]"
  274.         )
  275.  
  276.         self.assertEqual(
  277.             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
  278.             u"g[<252><223>]rk"
  279.         )
  280.  
  281.         self.assertEqual(
  282.             u"g\xfc\xdf".encode("ascii", "test.handler1"),
  283.             u"g[<252><223>]"
  284.         )
  285.  
  286.     def test_longstrings(self):
  287.         # test long strings to check for memory overflow problems
  288.         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
  289.                    "backslashreplace"]
  290.         # register the handlers under different names,
  291.         # to prevent the codec from recognizing the name
  292.         for err in errors:
  293.             codecs.register_error("test." + err, codecs.lookup_error(err))
  294.         l = 1000
  295.         errors += [ "test." + err for err in errors ]
  296.         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
  297.             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
  298.                         "utf-8", "utf-7", "utf-16", "utf-32"):
  299.                 for err in errors:
  300.                     try:
  301.                         uni.encode(enc, err)
  302.                     except UnicodeError:
  303.                         pass
  304.  
  305.     def check_exceptionobjectargs(self, exctype, args, msg):
  306.         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
  307.         # check with one missing argument
  308.         self.assertRaises(TypeError, exctype, *args[:-1])
  309.         # check with one argument too much
  310.         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
  311.         # check with one argument of the wrong type
  312.         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
  313.         for i in xrange(len(args)):
  314.             for wrongarg in wrongargs:
  315.                 if type(wrongarg) is type(args[i]):
  316.                     continue
  317.                 # build argument array
  318.                 callargs = []
  319.                 for j in xrange(len(args)):
  320.                     if i==j:
  321.                         callargs.append(wrongarg)
  322.                     else:
  323.                         callargs.append(args[i])
  324.                 self.assertRaises(TypeError, exctype, *callargs)
  325.  
  326.         # check with the correct number and type of arguments
  327.         exc = exctype(*args)
  328.         self.assertEquals(str(exc), msg)
  329.  
  330.     def test_unicodeencodeerror(self):
  331.         self.check_exceptionobjectargs(
  332.             UnicodeEncodeError,
  333.             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
  334.             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
  335.         )
  336.         self.check_exceptionobjectargs(
  337.             UnicodeEncodeError,
  338.             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
  339.             "'ascii' codec can't encode characters in position 1-3: ouch"
  340.         )
  341.         self.check_exceptionobjectargs(
  342.             UnicodeEncodeError,
  343.             ["ascii", u"\xfcx", 0, 1, "ouch"],
  344.             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
  345.         )
  346.         self.check_exceptionobjectargs(
  347.             UnicodeEncodeError,
  348.             ["ascii", u"\u0100x", 0, 1, "ouch"],
  349.             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
  350.         )
  351.         self.check_exceptionobjectargs(
  352.             UnicodeEncodeError,
  353.             ["ascii", u"\uffffx", 0, 1, "ouch"],
  354.             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
  355.         )
  356.         if sys.maxunicode > 0xffff:
  357.             self.check_exceptionobjectargs(
  358.                 UnicodeEncodeError,
  359.                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
  360.                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
  361.             )
  362.  
  363.     def test_unicodedecodeerror(self):
  364.         self.check_exceptionobjectargs(
  365.             UnicodeDecodeError,
  366.             ["ascii", "g\xfcrk", 1, 2, "ouch"],
  367.             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
  368.         )
  369.         self.check_exceptionobjectargs(
  370.             UnicodeDecodeError,
  371.             ["ascii", "g\xfcrk", 1, 3, "ouch"],
  372.             "'ascii' codec can't decode bytes in position 1-2: ouch"
  373.         )
  374.  
  375.     def test_unicodetranslateerror(self):
  376.         self.check_exceptionobjectargs(
  377.             UnicodeTranslateError,
  378.             [u"g\xfcrk", 1, 2, "ouch"],
  379.             "can't translate character u'\\xfc' in position 1: ouch"
  380.         )
  381.         self.check_exceptionobjectargs(
  382.             UnicodeTranslateError,
  383.             [u"g\u0100rk", 1, 2, "ouch"],
  384.             "can't translate character u'\\u0100' in position 1: ouch"
  385.         )
  386.         self.check_exceptionobjectargs(
  387.             UnicodeTranslateError,
  388.             [u"g\uffffrk", 1, 2, "ouch"],
  389.             "can't translate character u'\\uffff' in position 1: ouch"
  390.         )
  391.         if sys.maxunicode > 0xffff:
  392.             self.check_exceptionobjectargs(
  393.                 UnicodeTranslateError,
  394.                 [u"g\U00010000rk", 1, 2, "ouch"],
  395.                 "can't translate character u'\\U00010000' in position 1: ouch"
  396.             )
  397.         self.check_exceptionobjectargs(
  398.             UnicodeTranslateError,
  399.             [u"g\xfcrk", 1, 3, "ouch"],
  400.             "can't translate characters in position 1-2: ouch"
  401.         )
  402.  
  403.     def test_badandgoodstrictexceptions(self):
  404.         # "strict" complains about a non-exception passed in
  405.         self.assertRaises(
  406.             TypeError,
  407.             codecs.strict_errors,
  408.             42
  409.         )
  410.         # "strict" complains about the wrong exception type
  411.         self.assertRaises(
  412.             Exception,
  413.             codecs.strict_errors,
  414.             Exception("ouch")
  415.         )
  416.  
  417.         # If the correct exception is passed in, "strict" raises it
  418.         self.assertRaises(
  419.             UnicodeEncodeError,
  420.             codecs.strict_errors,
  421.             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
  422.         )
  423.  
  424.     def test_badandgoodignoreexceptions(self):
  425.         # "ignore" complains about a non-exception passed in
  426.         self.assertRaises(
  427.            TypeError,
  428.            codecs.ignore_errors,
  429.            42
  430.         )
  431.         # "ignore" complains about the wrong exception type
  432.         self.assertRaises(
  433.            TypeError,
  434.            codecs.ignore_errors,
  435.            UnicodeError("ouch")
  436.         )
  437.         # If the correct exception is passed in, "ignore" returns an empty replacement
  438.         self.assertEquals(
  439.             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  440.             (u"", 1)
  441.         )
  442.         self.assertEquals(
  443.             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
  444.             (u"", 1)
  445.         )
  446.         self.assertEquals(
  447.             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
  448.             (u"", 1)
  449.         )
  450.  
  451.     def test_badandgoodreplaceexceptions(self):
  452.         # "replace" complains about a non-exception passed in
  453.         self.assertRaises(
  454.            TypeError,
  455.            codecs.replace_errors,
  456.            42
  457.         )
  458.         # "replace" complains about the wrong exception type
  459.         self.assertRaises(
  460.            TypeError,
  461.            codecs.replace_errors,
  462.            UnicodeError("ouch")
  463.         )
  464.         self.assertRaises(
  465.             TypeError,
  466.             codecs.replace_errors,
  467.             BadObjectUnicodeEncodeError()
  468.         )
  469.         self.assertRaises(
  470.             TypeError,
  471.             codecs.replace_errors,
  472.             BadObjectUnicodeDecodeError()
  473.         )
  474.         # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
  475.         self.assertEquals(
  476.             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  477.             (u"?", 1)
  478.         )
  479.         self.assertEquals(
  480.             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
  481.             (u"\ufffd", 1)
  482.         )
  483.         self.assertEquals(
  484.             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
  485.             (u"\ufffd", 1)
  486.         )
  487.  
  488.     def test_badandgoodxmlcharrefreplaceexceptions(self):
  489.         # "xmlcharrefreplace" complains about a non-exception passed in
  490.         self.assertRaises(
  491.            TypeError,
  492.            codecs.xmlcharrefreplace_errors,
  493.            42
  494.         )
  495.         # "xmlcharrefreplace" complains about the wrong exception types
  496.         self.assertRaises(
  497.            TypeError,
  498.            codecs.xmlcharrefreplace_errors,
  499.            UnicodeError("ouch")
  500.         )
  501.         # "xmlcharrefreplace" can only be used for encoding
  502.         self.assertRaises(
  503.             TypeError,
  504.             codecs.xmlcharrefreplace_errors,
  505.             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
  506.         )
  507.         self.assertRaises(
  508.             TypeError,
  509.             codecs.xmlcharrefreplace_errors,
  510.             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
  511.         )
  512.         # Use the correct exception
  513.         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
  514.         s = "".join(unichr(c) for c in cs)
  515.         self.assertEquals(
  516.             codecs.xmlcharrefreplace_errors(
  517.                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
  518.             ),
  519.             (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
  520.         )
  521.  
  522.     def test_badandgoodbackslashreplaceexceptions(self):
  523.         # "backslashreplace" complains about a non-exception passed in
  524.         self.assertRaises(
  525.            TypeError,
  526.            codecs.backslashreplace_errors,
  527.            42
  528.         )
  529.         # "backslashreplace" complains about the wrong exception types
  530.         self.assertRaises(
  531.            TypeError,
  532.            codecs.backslashreplace_errors,
  533.            UnicodeError("ouch")
  534.         )
  535.         # "backslashreplace" can only be used for encoding
  536.         self.assertRaises(
  537.             TypeError,
  538.             codecs.backslashreplace_errors,
  539.             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
  540.         )
  541.         self.assertRaises(
  542.             TypeError,
  543.             codecs.backslashreplace_errors,
  544.             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
  545.         )
  546.         # Use the correct exception
  547.         self.assertEquals(
  548.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  549.             (u"\\u3042", 1)
  550.         )
  551.         self.assertEquals(
  552.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
  553.             (u"\\x00", 1)
  554.         )
  555.         self.assertEquals(
  556.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
  557.             (u"\\xff", 1)
  558.         )
  559.         self.assertEquals(
  560.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
  561.             (u"\\u0100", 1)
  562.         )
  563.         self.assertEquals(
  564.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
  565.             (u"\\uffff", 1)
  566.         )
  567.         if sys.maxunicode>0xffff:
  568.             self.assertEquals(
  569.                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
  570.                 (u"\\U00010000", 1)
  571.             )
  572.             self.assertEquals(
  573.                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
  574.                 (u"\\U0010ffff", 1)
  575.             )
  576.  
  577.     def test_badhandlerresults(self):
  578.         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
  579.         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
  580.  
  581.         for res in results:
  582.             codecs.register_error("test.badhandler", lambda: res)
  583.             for enc in encs:
  584.                 self.assertRaises(
  585.                     TypeError,
  586.                     u"\u3042".encode,
  587.                     enc,
  588.                     "test.badhandler"
  589.                 )
  590.             for (enc, bytes) in (
  591.                 ("ascii", "\xff"),
  592.                 ("utf-8", "\xff"),
  593.                 ("utf-7", "+x-"),
  594.                 ("unicode-internal", "\x00"),
  595.             ):
  596.                 self.assertRaises(
  597.                     TypeError,
  598.                     bytes.decode,
  599.                     enc,
  600.                     "test.badhandler"
  601.                 )
  602.  
  603.     def test_lookup(self):
  604.         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
  605.         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
  606.         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
  607.         self.assertEquals(
  608.             codecs.xmlcharrefreplace_errors,
  609.             codecs.lookup_error("xmlcharrefreplace")
  610.         )
  611.         self.assertEquals(
  612.             codecs.backslashreplace_errors,
  613.             codecs.lookup_error("backslashreplace")
  614.         )
  615.  
  616.     def test_unencodablereplacement(self):
  617.         def unencrepl(exc):
  618.             if isinstance(exc, UnicodeEncodeError):
  619.                 return (u"\u4242", exc.end)
  620.             else:
  621.                 raise TypeError("don't know how to handle %r" % exc)
  622.         codecs.register_error("test.unencreplhandler", unencrepl)
  623.         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
  624.             self.assertRaises(
  625.                 UnicodeEncodeError,
  626.                 u"\u4242".encode,
  627.                 enc,
  628.                 "test.unencreplhandler"
  629.             )
  630.  
  631.     def test_badregistercall(self):
  632.         # enhance coverage of:
  633.         # Modules/_codecsmodule.c::register_error()
  634.         # Python/codecs.c::PyCodec_RegisterError()
  635.         self.assertRaises(TypeError, codecs.register_error, 42)
  636.         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
  637.  
  638.     def test_badlookupcall(self):
  639.         # enhance coverage of:
  640.         # Modules/_codecsmodule.c::lookup_error()
  641.         self.assertRaises(TypeError, codecs.lookup_error)
  642.  
  643.     def test_unknownhandler(self):
  644.         # enhance coverage of:
  645.         # Modules/_codecsmodule.c::lookup_error()
  646.         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
  647.  
  648.     def test_xmlcharrefvalues(self):
  649.         # enhance coverage of:
  650.         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
  651.         # and inline implementations
  652.         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
  653.         if sys.maxunicode>=100000:
  654.             v += (100000, 500000, 1000000)
  655.         s = u"".join([unichr(x) for x in v])
  656.         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
  657.         for enc in ("ascii", "iso-8859-15"):
  658.             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
  659.                 s.encode(enc, err)
  660.  
  661.     def test_decodehelper(self):
  662.         # enhance coverage of:
  663.         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
  664.         # and callers
  665.         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
  666.  
  667.         def baddecodereturn1(exc):
  668.             return 42
  669.         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
  670.         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
  671.         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
  672.         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
  673.         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
  674.         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
  675.         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
  676.  
  677.         def baddecodereturn2(exc):
  678.             return (u"?", None)
  679.         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
  680.         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
  681.  
  682.         handler = PosReturn()
  683.         codecs.register_error("test.posreturn", handler.handle)
  684.  
  685.         # Valid negative position
  686.         handler.pos = -1
  687.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
  688.  
  689.         # Valid negative position
  690.         handler.pos = -2
  691.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
  692.  
  693.         # Negative position out of bounds
  694.         handler.pos = -3
  695.         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
  696.  
  697.         # Valid positive position
  698.         handler.pos = 1
  699.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
  700.  
  701.         # Largest valid positive position (one beyond end of input)
  702.         handler.pos = 2
  703.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
  704.  
  705.         # Invalid positive position
  706.         handler.pos = 3
  707.         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
  708.  
  709.         # Restart at the "0"
  710.         handler.pos = 6
  711.         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
  712.  
  713.         class D(dict):
  714.             def __getitem__(self, key):
  715.                 raise ValueError
  716.         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
  717.         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
  718.         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
  719.  
  720.     def test_encodehelper(self):
  721.         # enhance coverage of:
  722.         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
  723.         # and callers
  724.         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
  725.  
  726.         def badencodereturn1(exc):
  727.             return 42
  728.         codecs.register_error("test.badencodereturn1", badencodereturn1)
  729.         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
  730.  
  731.         def badencodereturn2(exc):
  732.             return (u"?", None)
  733.         codecs.register_error("test.badencodereturn2", badencodereturn2)
  734.         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
  735.  
  736.         handler = PosReturn()
  737.         codecs.register_error("test.posreturn", handler.handle)
  738.  
  739.         # Valid negative position
  740.         handler.pos = -1
  741.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
  742.  
  743.         # Valid negative position
  744.         handler.pos = -2
  745.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
  746.  
  747.         # Negative position out of bounds
  748.         handler.pos = -3
  749.         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
  750.  
  751.         # Valid positive position
  752.         handler.pos = 1
  753.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
  754.  
  755.         # Largest valid positive position (one beyond end of input
  756.         handler.pos = 2
  757.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
  758.  
  759.         # Invalid positive position
  760.         handler.pos = 3
  761.         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
  762.  
  763.         handler.pos = 0
  764.  
  765.         class D(dict):
  766.             def __getitem__(self, key):
  767.                 raise ValueError
  768.         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
  769.             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
  770.             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
  771.             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
  772.  
  773.     def test_translatehelper(self):
  774.         # enhance coverage of:
  775.         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
  776.         # and callers
  777.         # (Unfortunately the errors argument is not directly accessible
  778.         # from Python, so we can't test that much)
  779.         class D(dict):
  780.             def __getitem__(self, key):
  781.                 raise ValueError
  782.         self.assertRaises(ValueError, u"\xff".translate, D())
  783.         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
  784.         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
  785.  
  786.     def test_bug828737(self):
  787.         charmap = {
  788.             ord("&"): u"&",
  789.             ord("<"): u"<",
  790.             ord(">"): u">",
  791.             ord('"'): u""",
  792.         }
  793.  
  794.         for n in (1, 10, 100, 1000):
  795.             text = u'abc<def>ghi'*n
  796.             text.translate(charmap)
  797.  
  798. def test_main():
  799.     test.test_support.run_unittest(CodecCallbackTest)
  800.  
  801. if __name__ == "__main__":
  802.     test_main()
  803.