home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_973 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  6.1 KB  |  215 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __license__ = 'GPL v3'
  5. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  6. import re
  7. import cStringIO
  8. import codecs
  9. from calibre.ebooks.metadata import MetaInformation, string_to_authors
  10. title_pat = re.compile('\\{\\\\info.*?\\{\\\\title(.*?)(?<!\\\\)\\}', re.DOTALL)
  11. author_pat = re.compile('\\{\\\\info.*?\\{\\\\author(.*?)(?<!\\\\)\\}', re.DOTALL)
  12. comment_pat = re.compile('\\{\\\\info.*?\\{\\\\subject(.*?)(?<!\\\\)\\}', re.DOTALL)
  13. category_pat = re.compile('\\{\\\\info.*?\\{\\\\category(.*?)(?<!\\\\)\\}', re.DOTALL)
  14.  
  15. def get_document_info(stream):
  16.     block_size = 4096
  17.     stream.seek(0)
  18.     found = False
  19.     block = ''
  20.     while not found:
  21.         prefix = block[-6:]
  22.         block = prefix + stream.read(block_size)
  23.         actual_block_size = len(block) - len(prefix)
  24.         if len(block) == len(prefix):
  25.             break
  26.         
  27.         idx = block.find('{\\info')
  28.         if idx >= 0:
  29.             found = True
  30.             pos = (stream.tell() - actual_block_size) + idx - len(prefix)
  31.             stream.seek(pos)
  32.             continue
  33.         if block.find('\\sect') > -1:
  34.             break
  35.             continue
  36.     if not found:
  37.         return (None, 0)
  38.     data = cStringIO.StringIO()
  39.     count = 0
  40.     pos = stream.tell()
  41.     while True:
  42.         ch = stream.read(1)
  43.         if ch == '\\':
  44.             data.write(ch + stream.read(1))
  45.             continue
  46.         
  47.         if ch == '{':
  48.             count += 1
  49.         elif ch == '}':
  50.             count -= 1
  51.         
  52.         data.write(ch)
  53.         if count == 0:
  54.             break
  55.             continue
  56.     return (data.getvalue(), pos)
  57.  
  58.  
  59. def detect_codepage(stream):
  60.     pat = re.compile('\\\\ansicpg(\\d+)')
  61.     match = pat.search(stream.read(512))
  62.     if match is not None:
  63.         num = match.group(1)
  64.         if num == '0':
  65.             num = '1250'
  66.         
  67.         codec = 'cp' + num
  68.         
  69.         try:
  70.             codecs.lookup(codec)
  71.             return codec
  72.  
  73.     
  74.  
  75.  
  76. def decode(raw, codec):
  77.     if codec is not None:
  78.         
  79.         def codepage(match):
  80.             return chr(int(match.group(1), 16))
  81.  
  82.         raw = re.sub("\\\\'([a-fA-F0-9]{2})", codepage, raw)
  83.         raw = raw.decode(codec)
  84.     
  85.     
  86.     def uni(match):
  87.         return unichr(int(match.group(1)))
  88.  
  89.     raw = re.sub('\\\\u([0-9]{4}).', uni, raw)
  90.     return raw
  91.  
  92.  
  93. def get_metadata(stream):
  94.     (title, author, comment, category) = (None, None, None, None)
  95.     stream.seek(0)
  96.     if stream.read(5) != '{\\rtf':
  97.         return MetaInformation(None, None)
  98.     block = get_document_info(stream)[0]
  99.     if not block:
  100.         return MetaInformation(None, None)
  101.     stream.seek(0)
  102.     cpg = detect_codepage(stream)
  103.     stream.seek(0)
  104.     title_match = title_pat.search(block)
  105.     author_match = author_pat.search(block)
  106.     if author_match:
  107.         author = decode(author_match.group(1).strip(), cpg)
  108.     
  109.     comment_match = comment_pat.search(block)
  110.     if comment_match:
  111.         comment = decode(comment_match.group(1).strip(), cpg)
  112.     
  113.     category_match = category_pat.search(block)
  114.     if category_match:
  115.         category = decode(category_match.group(1).strip(), cpg)
  116.     
  117.     mi = MetaInformation(title, author)
  118.     if author:
  119.         mi.authors = string_to_authors(author)
  120.     
  121.     mi.comments = comment
  122.     mi.category = category
  123.     return mi
  124.  
  125.  
  126. def create_metadata(stream, options):
  127.     md = '{\\info'
  128.     if options.title:
  129.         title = options.title.encode('ascii', 'ignore')
  130.         md += '{\\title %s}' % (title,)
  131.     
  132.     if options.authors:
  133.         au = options.authors
  134.         if not isinstance(au, basestring):
  135.             au = u', '.join(au)
  136.         
  137.         author = au.encode('ascii', 'ignore')
  138.         md += '{\\author %s}' % (author,)
  139.     
  140.     if options.category:
  141.         category = options.category.encode('ascii', 'ignore')
  142.         md += '{\\category %s}' % (category,)
  143.     
  144.     comp = None if hasattr(options, 'comment') else options.comments
  145.     if comp:
  146.         comment = comp.encode('ascii', 'ignore')
  147.         md += '{\\subject %s}' % (comment,)
  148.     
  149.     if len(md) > 6:
  150.         md += '}'
  151.         stream.seek(0)
  152.         src = stream.read()
  153.         ans = src[:6] + md + src[6:]
  154.         stream.seek(0)
  155.         stream.write(ans)
  156.     
  157.  
  158.  
  159. def set_metadata(stream, options):
  160.     
  161.     def add_metadata_item(src, name, val):
  162.         index = src.rindex('}')
  163.         return src[:index] + '{\\ '[:-1] + name + ' ' + val + '}}'
  164.  
  165.     (src, pos) = get_document_info(stream)
  166.     if not src:
  167.         create_metadata(stream, options)
  168.     else:
  169.         olen = len(src)
  170.         base_pat = '\\{\\\\name(.*?)(?<!\\\\)\\}'
  171.         title = options.title
  172.         if title != None:
  173.             title = title.encode('ascii', 'replace')
  174.             pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
  175.             if pat.search(src):
  176.                 src = pat.sub('{\\\\title ' + title + '}', src)
  177.             else:
  178.                 src = add_metadata_item(src, 'title', title)
  179.         
  180.         comment = options.comments
  181.         if comment != None:
  182.             comment = comment.encode('ascii', 'replace')
  183.             pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
  184.             if pat.search(src):
  185.                 src = pat.sub('{\\\\subject ' + comment + '}', src)
  186.             else:
  187.                 src = add_metadata_item(src, 'subject', comment)
  188.         
  189.         author = options.authors
  190.         if author != None:
  191.             author = ', '.join(author)
  192.             author = author.encode('ascii', 'ignore')
  193.             pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
  194.             if pat.search(src):
  195.                 src = pat.sub('{\\\\author ' + author + '}', src)
  196.             else:
  197.                 src = add_metadata_item(src, 'author', author)
  198.         
  199.         category = options.category
  200.         if category != None:
  201.             category = category.encode('ascii', 'replace')
  202.             pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
  203.             if pat.search(src):
  204.                 src = pat.sub('{\\\\category ' + category + '}', src)
  205.             else:
  206.                 src = add_metadata_item(src, 'category', category)
  207.         
  208.         stream.seek(pos + olen)
  209.         after = stream.read()
  210.         stream.seek(pos)
  211.         stream.truncate()
  212.         stream.write(src)
  213.         stream.write(after)
  214.  
  215.