home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_338 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  10.4 KB  |  389 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import re
  5. from functools import reduce
  6. from _csv import Error, __version__, writer, reader, register_dialect, unregister_dialect, get_dialect, list_dialects, field_size_limit, QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, __doc__
  7. from _csv import Dialect as _Dialect
  8.  
  9. try:
  10.     from cStringIO import StringIO
  11. except ImportError:
  12.     from StringIO import StringIO
  13.  
  14. __all__ = [
  15.     'QUOTE_MINIMAL',
  16.     'QUOTE_ALL',
  17.     'QUOTE_NONNUMERIC',
  18.     'QUOTE_NONE',
  19.     'Error',
  20.     'Dialect',
  21.     '__doc__',
  22.     'excel',
  23.     'excel_tab',
  24.     'field_size_limit',
  25.     'reader',
  26.     'writer',
  27.     'register_dialect',
  28.     'get_dialect',
  29.     'list_dialects',
  30.     'Sniffer',
  31.     'unregister_dialect',
  32.     '__version__',
  33.     'DictReader',
  34.     'DictWriter']
  35.  
  36. class Dialect:
  37.     _name = ''
  38.     _valid = False
  39.     delimiter = None
  40.     quotechar = None
  41.     escapechar = None
  42.     doublequote = None
  43.     skipinitialspace = None
  44.     lineterminator = None
  45.     quoting = None
  46.     
  47.     def __init__(self):
  48.         if self.__class__ != Dialect:
  49.             self._valid = True
  50.         
  51.         self._validate()
  52.  
  53.     
  54.     def _validate(self):
  55.         
  56.         try:
  57.             _Dialect(self)
  58.         except TypeError:
  59.             e = None
  60.             raise Error(str(e))
  61.  
  62.  
  63.  
  64.  
  65. class excel(Dialect):
  66.     delimiter = ','
  67.     quotechar = '"'
  68.     doublequote = True
  69.     skipinitialspace = False
  70.     lineterminator = '\r\n'
  71.     quoting = QUOTE_MINIMAL
  72.  
  73. register_dialect('excel', excel)
  74.  
  75. class excel_tab(excel):
  76.     delimiter = '\t'
  77.  
  78. register_dialect('excel-tab', excel_tab)
  79.  
  80. class DictReader:
  81.     
  82.     def __init__(self, f, fieldnames = None, restkey = None, restval = None, dialect = 'excel', *args, **kwds):
  83.         self._fieldnames = fieldnames
  84.         self.restkey = restkey
  85.         self.restval = restval
  86.         self.reader = reader(f, dialect, *args, **kwds)
  87.         self.dialect = dialect
  88.         self.line_num = 0
  89.  
  90.     
  91.     def __iter__(self):
  92.         return self
  93.  
  94.     
  95.     def fieldnames(self):
  96.         if self._fieldnames is None:
  97.             
  98.             try:
  99.                 self._fieldnames = self.reader.next()
  100.             except StopIteration:
  101.                 pass
  102.             except:
  103.                 None<EXCEPTION MATCH>StopIteration
  104.             
  105.  
  106.         None<EXCEPTION MATCH>StopIteration
  107.         self.line_num = self.reader.line_num
  108.         return self._fieldnames
  109.  
  110.     fieldnames = property(fieldnames)
  111.     
  112.     def fieldnames(self, value):
  113.         self._fieldnames = value
  114.  
  115.     fieldnames = fieldnames.setter(fieldnames)
  116.     
  117.     def next(self):
  118.         if self.line_num == 0:
  119.             self.fieldnames
  120.         
  121.         row = self.reader.next()
  122.         self.line_num = self.reader.line_num
  123.         while row == []:
  124.             row = self.reader.next()
  125.         d = dict(zip(self.fieldnames, row))
  126.         lf = len(self.fieldnames)
  127.         lr = len(row)
  128.         if lf < lr:
  129.             d[self.restkey] = row[lf:]
  130.         elif lf > lr:
  131.             for key in self.fieldnames[lr:]:
  132.                 d[key] = self.restval
  133.             
  134.         
  135.         return d
  136.  
  137.  
  138.  
  139. class DictWriter:
  140.     
  141.     def __init__(self, f, fieldnames, restval = '', extrasaction = 'raise', dialect = 'excel', *args, **kwds):
  142.         self.fieldnames = fieldnames
  143.         self.restval = restval
  144.         if extrasaction.lower() not in ('raise', 'ignore'):
  145.             raise ValueError, "extrasaction (%s) must be 'raise' or 'ignore'" % extrasaction
  146.         extrasaction.lower() not in ('raise', 'ignore')
  147.         self.extrasaction = extrasaction
  148.         self.writer = writer(f, dialect, *args, **kwds)
  149.  
  150.     
  151.     def _dict_to_list(self, rowdict):
  152.         return [ rowdict.get(key, self.restval) for key in self.fieldnames ]
  153.  
  154.     
  155.     def writerow(self, rowdict):
  156.         return self.writer.writerow(self._dict_to_list(rowdict))
  157.  
  158.     
  159.     def writerows(self, rowdicts):
  160.         rows = []
  161.         for rowdict in rowdicts:
  162.             rows.append(self._dict_to_list(rowdict))
  163.         
  164.         return self.writer.writerows(rows)
  165.  
  166.  
  167.  
  168. try:
  169.     complex
  170. except NameError:
  171.     complex = float
  172.  
  173.  
  174. class Sniffer:
  175.     
  176.     def __init__(self):
  177.         self.preferred = [
  178.             ',',
  179.             '\t',
  180.             ';',
  181.             ' ',
  182.             ':']
  183.  
  184.     
  185.     def sniff(self, sample, delimiters = None):
  186.         (quotechar, delimiter, skipinitialspace) = self._guess_quote_and_delimiter(sample, delimiters)
  187.         if not delimiter:
  188.             (delimiter, skipinitialspace) = self._guess_delimiter(sample, delimiters)
  189.         
  190.         if not delimiter:
  191.             raise Error, 'Could not determine delimiter'
  192.         delimiter
  193.         
  194.         class dialect(Dialect):
  195.             _name = 'sniffed'
  196.             lineterminator = '\r\n'
  197.             quoting = QUOTE_MINIMAL
  198.             doublequote = False
  199.  
  200.         dialect.delimiter = delimiter
  201.         if not quotechar:
  202.             pass
  203.         dialect.quotechar = '"'
  204.         dialect.skipinitialspace = skipinitialspace
  205.         return dialect
  206.  
  207.     
  208.     def _guess_quote_and_delimiter(self, data, delimiters):
  209.         matches = []
  210.         for restr in ('(?P<delim>[^\\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\\w\n"\'])(?P<space> ?)', '(?P<delim>>[^\\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):
  211.             regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
  212.             matches = regexp.findall(data)
  213.             if matches:
  214.                 break
  215.                 continue
  216.         
  217.         if not matches:
  218.             return ('', None, 0)
  219.         quotes = { }
  220.         delims = { }
  221.         spaces = 0
  222.         for m in matches:
  223.             n = regexp.groupindex['quote'] - 1
  224.             key = m[n]
  225.             if key:
  226.                 quotes[key] = quotes.get(key, 0) + 1
  227.             
  228.             
  229.             try:
  230.                 n = regexp.groupindex['delim'] - 1
  231.                 key = m[n]
  232.             except KeyError:
  233.                 continue
  234.  
  235.             if key:
  236.                 if delimiters is None or key in delimiters:
  237.                     delims[key] = delims.get(key, 0) + 1
  238.                 
  239.             
  240.             try:
  241.                 n = regexp.groupindex['space'] - 1
  242.             except KeyError:
  243.                 continue
  244.  
  245.             if m[n]:
  246.                 spaces += 1
  247.                 continue
  248.         
  249.         quotechar = reduce((lambda a, b, quotes = quotes: if not quotes[a] > quotes[b] or a:
  250. passb), quotes.keys())
  251.         if delims:
  252.             delim = reduce((lambda a, b, delims = delims: if not delims[a] > delims[b] or a:
  253. passb), delims.keys())
  254.             skipinitialspace = delims[delim] == spaces
  255.             if delim == '\n':
  256.                 delim = ''
  257.             
  258.         else:
  259.             delim = ''
  260.             skipinitialspace = 0
  261.         return (quotechar, delim, skipinitialspace)
  262.  
  263.     
  264.     def _guess_delimiter(self, data, delimiters):
  265.         data = filter(None, data.split('\n'))
  266.         ascii = [ chr(c) for c in range(127) ]
  267.         chunkLength = min(10, len(data))
  268.         iteration = 0
  269.         charFrequency = { }
  270.         modes = { }
  271.         delims = { }
  272.         start = 0
  273.         end = min(chunkLength, len(data))
  274.         while start < len(data):
  275.             iteration += 1
  276.             for line in data[start:end]:
  277.                 for char in ascii:
  278.                     metaFrequency = charFrequency.get(char, { })
  279.                     freq = line.count(char)
  280.                     metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
  281.                     charFrequency[char] = metaFrequency
  282.                 
  283.             
  284.             for char in charFrequency.keys():
  285.                 items = charFrequency[char].items()
  286.                 if len(items) > 1:
  287.                     modes[char] = reduce((lambda a, b: if not a[1] > b[1] or a:
  288. passb), items)
  289.                     items.remove(modes[char])
  290.                     modes[char] = (modes[char][0], modes[char][1] - reduce((lambda a, b: (0, a[1] + b[1])), items)[1])
  291.                     continue
  292.                 None if len(items) == 1 and items[0][0] == 0 else []
  293.                 modes[char] = items[0]
  294.             
  295.             modeList = modes.items()
  296.             total = float(chunkLength * iteration)
  297.             consistency = 1
  298.             threshold = 0.9
  299.             while len(delims) == 0 and consistency >= threshold:
  300.                 for k, v in modeList:
  301.                     if v[0] > 0 and v[1] > 0:
  302.                         if v[1] / total >= consistency:
  303.                             pass
  304.                         None if delimiters is None or k in delimiters else k in delimiters
  305.                         continue
  306.                 
  307.                 consistency -= 0.01
  308.             if len(delims) == 1:
  309.                 delim = delims.keys()[0]
  310.                 skipinitialspace = data[0].count(delim) == data[0].count('%c ' % delim)
  311.                 return (delim, skipinitialspace)
  312.             start = end
  313.             end += chunkLength
  314.             continue
  315.             len(delims) == 1
  316.         if not delims:
  317.             return ('', 0)
  318.         items = [ (v, k) for k, v in delims.items() ]
  319.         items.sort()
  320.         delim = items[-1][1]
  321.         skipinitialspace = data[0].count(delim) == data[0].count('%c ' % delim)
  322.         return (delim, skipinitialspace)
  323.  
  324.     
  325.     def has_header(self, sample):
  326.         rdr = reader(StringIO(sample), self.sniff(sample))
  327.         header = rdr.next()
  328.         columns = len(header)
  329.         columnTypes = { }
  330.         for i in range(columns):
  331.             columnTypes[i] = None
  332.         
  333.         checked = 0
  334.         for row in rdr:
  335.             if checked > 20:
  336.                 break
  337.             
  338.             checked += 1
  339.             if len(row) != columns:
  340.                 continue
  341.             
  342.             for col in columnTypes.keys():
  343.                 for thisType in [
  344.                     int,
  345.                     long,
  346.                     float,
  347.                     complex]:
  348.                     
  349.                     try:
  350.                         thisType(row[col])
  351.                     continue
  352.                     except (ValueError, OverflowError):
  353.                         continue
  354.                     
  355.  
  356.                 else:
  357.                     thisType = len(row[col])
  358.                 if thisType == long:
  359.                     thisType = int
  360.                 
  361.                 if thisType != columnTypes[col]:
  362.                     if columnTypes[col] is None:
  363.                         columnTypes[col] = thisType
  364.                     else:
  365.                         del columnTypes[col]
  366.                 columnTypes[col] is None
  367.             
  368.         
  369.         hasHeader = 0
  370.         for col, colType in columnTypes.items():
  371.             if type(colType) == type(0):
  372.                 if len(header[col]) != colType:
  373.                     hasHeader += 1
  374.                 else:
  375.                     hasHeader -= 1
  376.             len(header[col]) != colType
  377.             
  378.             try:
  379.                 colType(header[col])
  380.             except (ValueError, TypeError):
  381.                 hasHeader += 1
  382.                 continue
  383.  
  384.             hasHeader -= 1
  385.         
  386.         return hasHeader > 0
  387.  
  388.  
  389.