home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 June / maximum-cd-2011-06.iso / DiscContents / LibO_3.3.1_Win_x86_install_multi.exe / libreoffice1.cab / test_normalization.py < prev    next >
Encoding:
Python Source  |  2011-02-15  |  3.1 KB  |  104 lines

  1. from test.test_support import run_unittest, open_urlresource
  2. import unittest
  3.  
  4. import sys
  5. import os
  6. from unicodedata import normalize, unidata_version
  7.  
  8. TESTDATAFILE = "NormalizationTest" + os.extsep + "txt"
  9. TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE
  10.  
  11. if os.path.exists(TESTDATAFILE):
  12.     f = open(TESTDATAFILE)
  13.     l = f.readline()
  14.     f.close()
  15.     if not unidata_version in l:
  16.         os.unlink(TESTDATAFILE)
  17.  
  18. class RangeError(Exception):
  19.     pass
  20.  
  21. def NFC(str):
  22.     return normalize("NFC", str)
  23.  
  24. def NFKC(str):
  25.     return normalize("NFKC", str)
  26.  
  27. def NFD(str):
  28.     return normalize("NFD", str)
  29.  
  30. def NFKD(str):
  31.     return normalize("NFKD", str)
  32.  
  33. def unistr(data):
  34.     data = [int(x, 16) for x in data.split(" ")]
  35.     for x in data:
  36.         if x > sys.maxunicode:
  37.             raise RangeError
  38.     return u"".join([unichr(x) for x in data])
  39.  
  40. class NormalizationTest(unittest.TestCase):
  41.     def test_main(self):
  42.         part1_data = {}
  43.         for line in open_urlresource(TESTDATAURL):
  44.             if '#' in line:
  45.                 line = line.split('#')[0]
  46.             line = line.strip()
  47.             if not line:
  48.                 continue
  49.             if line.startswith("@Part"):
  50.                 part = line.split()[0]
  51.                 continue
  52.             if part == "@Part3":
  53.                 # XXX we don't support PRI #29 yet, so skip these tests for now
  54.                 continue
  55.             try:
  56.                 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
  57.             except RangeError:
  58.                 # Skip unsupported characters;
  59.                 # try atleast adding c1 if we are in part1
  60.                 if part == "@Part1":
  61.                     try:
  62.                         c1 = unistr(line.split(';')[0])
  63.                     except RangeError:
  64.                         pass
  65.                     else:
  66.                         part1_data[c1] = 1
  67.                 continue
  68.  
  69.             # Perform tests
  70.             self.failUnless(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
  71.             self.failUnless(c4 ==  NFC(c4) ==  NFC(c5), line)
  72.             self.failUnless(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
  73.             self.failUnless(c5 ==  NFD(c4) ==  NFD(c5), line)
  74.             self.failUnless(c4 == NFKC(c1) == NFKC(c2) == \
  75.                             NFKC(c3) == NFKC(c4) == NFKC(c5),
  76.                             line)
  77.             self.failUnless(c5 == NFKD(c1) == NFKD(c2) == \
  78.                             NFKD(c3) == NFKD(c4) == NFKD(c5),
  79.                             line)
  80.  
  81.             # Record part 1 data
  82.             if part == "@Part1":
  83.                 part1_data[c1] = 1
  84.  
  85.         # Perform tests for all other data
  86.         for c in range(sys.maxunicode+1):
  87.             X = unichr(c)
  88.             if X in part1_data:
  89.                 continue
  90.             self.failUnless(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
  91.  
  92.     def test_bug_834676(self):
  93.         # Check for bug 834676
  94.         normalize('NFC', u'\ud55c\uae00')
  95.  
  96.  
  97. def test_main():
  98.     # Hit the exception early
  99.     open_urlresource(TESTDATAURL)
  100.     run_unittest(NormalizationTest)
  101.  
  102. if __name__ == "__main__":
  103.     test_main()
  104.