home *** CD-ROM | disk | FTP | other *** search
- """Convert Cyrillic from iso-8859-1 Unicode-encoded to KOI8-R-encoded
-
- This script is used during the build process of the Russian translation
- of "Dive Into Python" (http://diveintopython.org/).
-
- It takes one argument, which can be either an HTML file or a directory.
- If a file, it converts the file in place; if a directory, it converts
- every HTML file in the immediate directory (but not recursively).
-
- Safe but pointless to run more than once on the same file or directory.
- """
-
- __author__ = "Mark Pilgrim (mark@diveintopython.org)"
- __version__ = "$Revision: 1.2 $"
- __date__ = "$Date: 2004/05/05 21:57:19 $"
- __copyright__ = "Copyright (c) 2001 Mark Pilgrim"
- __license__ = "Python"
-
- import os
- import sys
- import re
-
- unicodeToKOI8R = { \
- 'Ё': '\xb3',
- 'А': '\xe1',
- 'Б': '\xe2',
- 'В': '\xf7',
- 'Г': '\xe7',
- 'Д': '\xe4',
- 'Е': '\xe5',
- 'Ж': '\xf6',
- 'З': '\xfa',
- 'И': '\xe9',
- 'Й': '\xea',
- 'К': '\xeb',
- 'Л': '\xec',
- 'М': '\xed',
- 'Н': '\xee',
- 'О': '\xef',
- 'П': '\xf0',
- 'Р': '\xf2',
- 'С': '\xf3',
- 'Т': '\xf4',
- 'У': '\xf5',
- 'Ф': '\xe6',
- 'Х': '\xe8',
- 'Ц': '\xe3',
- 'Ч': '\xfe',
- 'Ш': '\xfb',
- 'Щ': '\xfd',
- 'Ъ': '\xff',
- 'Ы': '\xf9',
- 'Ь': '\xf8',
- 'Э': '\xfc',
- 'Ю': '\xe0',
- 'Я': '\xf1',
- 'а': '\xc1',
- 'б': '\xc2',
- 'в': '\xd7',
- 'г': '\xc7',
- 'д': '\xc4',
- 'е': '\xc5',
- 'ж': '\xd6',
- 'з': '\xda',
- 'и': '\xc9',
- 'й': '\xca',
- 'к': '\xcb',
- 'л': '\xcc',
- 'м': '\xcd',
- 'н': '\xce',
- 'о': '\xcf',
- 'п': '\xd0',
- 'р': '\xd2',
- 'с': '\xd3',
- 'т': '\xd4',
- 'у': '\xd5',
- 'ф': '\xc6',
- 'х': '\xc8',
- 'ц': '\xc3',
- 'ч': '\xde',
- 'ш': '\xdb',
- 'щ': '\xdd',
- 'ъ': '\xdf',
- 'ы': '\xd9',
- 'ь': '\xd8',
- 'э': '\xdc',
- 'ю': '\xc0',
- 'я': '\xd1',
- 'ё': '\xa3' }
-
- unicodePattern = re.compile(r'[0-9]{4,4};')
- charsetPattern = re.compile(r'ISO-8859-1', re.IGNORECASE)
-
- def translateMatch(match):
- unicode = match.group(0)
- if unicodeToKOI8R.has_key(unicode):
- return unicodeToKOI8R[unicode]
- else:
- return unicode
-
- def translateBuffer(buffer):
- buffer = unicodePattern.sub(translateMatch, buffer)
- buffer = charsetPattern.sub('KOI8-R', buffer)
- return buffer
-
- def translateFile(filename, outfilename=None):
- if not outfilename:
- outfilename = filename
- fsock = open(filename)
- buffer = fsock.read()
- fsock.close()
- buffer = translateBuffer(buffer)
- fsock = open(outfilename, 'wb')
- fsock.write(buffer)
- fsock.close()
-
- def htmlFilter(filename):
- return os.path.splitext(filename)[1] == '.html'
-
- def translateDirectory(directoryname, filterFunc=htmlFilter):
- fileList = [os.path.join(directoryname, f) for f in os.listdir(directoryname)]
- fileList = filter(filterFunc, fileList)
- map(translateFile, fileList)
-
- if __name__ == "__main__":
- name = sys.argv[1]
- if os.path.isdir(name):
- translateDirectory(name)
- else:
- translateFile(name)
-