home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 October / maximum-cd-2011-10.iso / DiscContents / digsby_setup.exe / lib / common / logger.pyo (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2011-06-22  |  16.5 KB  |  468 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.6)
  3.  
  4. from __future__ import with_statement
  5. import sys
  6. import re
  7. import os.path as os
  8. from operator import itemgetter
  9. import config
  10. from common import profile
  11. from datetime import datetime
  12. from util import soupify, Storage as S, tail, boolify
  13. from util.primitives.misc import fromutc
  14. from path import path
  15. from digsby import iswidget
  16. from traceback import print_exc
  17. from logging import getLogger
  18. log = getLogger('logger')
  19. log_info = log.info
  20. from common.message import Message
  21. from protocolmeta import SERVICE_MAP
  22. import lxml.html as lxml
  23. from util.htmlutils import render_contents
  24. if config.platform == 'win':
  25.     import blist
  26.     fastFind = blist.findFiles
  27. else:
  28.     fastFind = None
  29.  
  30. def get_default_logging_dir():
  31.     lp = localprefs
  32.     import prefs
  33.     localprefs = lp()
  34.     return path(localprefs['chatlogdir']) / DEFAULT_LOG_DIR_NAME / profile.username
  35.  
  36. DEFAULT_LOG_DIR_NAME = u'Digsby Logs'
  37. LOGSIZE_PARSE_LIMIT = 1024 * 15
  38.  
  39. def buddy_path(account, buddy):
  40.     return path(account.name).joinpath(account.username, buddy.name + '_' + buddy.service)
  41.  
  42. GROUP_CHAT_DIRNAME = 'Group Chats'
  43.  
  44. def chat_path(account, convo):
  45.     return path(account.name).joinpath(account.username, GROUP_CHAT_DIRNAME)
  46.  
  47.  
  48. def message_timestamp_id(dt):
  49.     return datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
  50.  
  51. message_timestamp_fmt = '%Y-%m-%d %H:%M:%S'
  52. message_timestamp_fmt_OLD = '%Y-%m-%d %H:%M'
  53. filename_format_re = re.compile('\\d{4}-\\d{2}-\\d{2}\\..*')
  54. message_shorttime_fmt = '%H:%M:%S %p'
  55. html_header = u'<?xml version="1.0" encoding="UTF-8"?>\n    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n    "http://www.w3.org/TR/html4/strict.dtd">\n<HTML>\n   <HEAD>\n      <meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />\n      <TITLE>%(title)s</TITLE>\n   <style>\n     .buddy { font-weight: bold; }\n     .buddy:after { content: ":" }\n\n     .time {\n        color: #a0a0a0;\n        font-family: monaco, courier new, monospace;\n        font-size: 75%%;\n     }\n     .time:hover { color: black; }\n\n     .outgoing { background-color: #efefef; }\n     .incoming { background-color: #ffffff; }\n   </style>\n   <script type="text/javascript">\n//<![CDATA[\n    function convert_time(datetime){\n        var dt = datetime.split(" ");\n        var date = dt[0].split("-");\n        var time = dt[1].split(":");\n        var t = new Date;\n        t.setUTCFullYear(date[0],date[1]-1,date[2]);\n        t.setUTCHours(time[0],time[1],time[2]);\n        return t.toLocaleTimeString();\n    }\n\n    function utc_to_local(){\n        var node;\n        for (var i=0; i<document.body.childNodes.length; i++){\n            node = document.body.childNodes[i];\n            if(node.nodeType == 1 && node.className.match("message")){\n                var showtime = convert_time(node.getAttribute(\'timestamp\'));\n                var newspan = \'<span class="time">(\' + showtime + \') </span>\';\n                var msgnode = node;\n                msgnode.innerHTML = newspan + msgnode.innerHTML;\n            }\n        }\n    }\n//]]>\n   </script>\n   </HEAD>\n   <BODY onload="utc_to_local()">\n'
  56. html_log_entry = '<div class="%(type)s message" auto="%(auto)s" timestamp="%(timestamp)s"><span class="buddy">%(buddy)s</span> <span class="msgcontent">%(message)s</span></div>\n'
  57.  
  58. class Logger(object):
  59.     
  60.     def __init__(self, output_dir = None, log_ims = True, log_chats = True, log_widgets = False):
  61.         if output_dir is None:
  62.             output_dir = get_default_logging_dir()
  63.         
  64.         self.OutputDir = output_dir
  65.         self.OutputType = 'html'
  66.         self.LogChats = log_chats
  67.         self.LogIMs = log_ims
  68.         self.LogWidgets = log_widgets
  69.  
  70.     
  71.     def calculate_log_sizes(self):
  72.         log_sizes = blist.getLogSizes(self.OutputDir)
  73.         return sorted((lambda .0: for name, size in .0:
  74. (name, size))(log_sizes.iteritems()), key = itemgetter(1), reverse = True)
  75.  
  76.     
  77.     def on_message(self, messageobj = None, convo = None, **opts):
  78.         if not self.should_log_message(messageobj):
  79.             return None
  80.         messageobj = modify_message(messageobj)
  81.         output = self.generate_output(messageobj)
  82.         written_size = self.write_output(output, messageobj)
  83.         
  84.         try:
  85.             buddy = messageobj.conversation.buddy
  86.         except AttributeError:
  87.             self.should_log_message(messageobj)
  88.             self.should_log_message(messageobj)
  89.         except:
  90.             self.should_log_message(messageobj)
  91.  
  92.         
  93.         try:
  94.             buddy.increase_log_size(written_size)
  95.         except AttributeError:
  96.             self.should_log_message(messageobj)
  97.             self.should_log_message(messageobj)
  98.             import traceback
  99.             traceback.print_exc()
  100.         except:
  101.             self.should_log_message(messageobj)
  102.  
  103.  
  104.     
  105.     def should_log_message(self, messageobj):
  106.         if messageobj is None:
  107.             return False
  108.         convo = messageobj.conversation
  109.         if convo.ischat and not (self.LogChats):
  110.             return False
  111.         if not (convo.ischat) and not (self.LogIMs):
  112.             return False
  113.         if not (self.LogWidgets) and iswidget(convo.buddy):
  114.             return False
  115.         if messageobj.buddy is None:
  116.             return False
  117.         if not messageobj.buddy.protocol.should_log(messageobj):
  118.             return False
  119.         return True
  120.  
  121.     
  122.     def history_for(self, account, buddy):
  123.         log.debug('history_for(%r, %r)', account, buddy)
  124.         files = self.logfiles_for(account, buddy)
  125.         log.debug('%d %s log files found', len(files), self.OutputType)
  126.         if not files:
  127.             return iter([])
  128.         if fastFind is None:
  129.             files.sort(reverse = True)
  130.         
  131.         return history_from_files(files, 'html')
  132.  
  133.     
  134.     def history_for_safe(self, account, buddy):
  135.         
  136.         try:
  137.             hist = self.history_for(account, buddy)
  138.         except Exception:
  139.             print_exc()
  140.             hist = iter([])
  141.  
  142.         return hist
  143.  
  144.     
  145.     def logsize(self, account, buddy):
  146.         return sum((lambda .0: for f in .0:
  147. f.size)(self.logfiles_for(account, buddy)))
  148.  
  149.     
  150.     def logsize_for_nameservice(self, name, service):
  151.         glob_str = ''.join(('*/', name, '_', service, '/*.html'))
  152.         outpath = self.OutputDir
  153.         types = SERVICE_MAP.get(service, [
  154.             service])
  155.         total = 0
  156.         for accttype in types:
  157.             logpath = outpath / accttype
  158.             total += sum((lambda .0: for f in .0:
  159. f.size)(logpath.glob(glob_str)))
  160.         
  161.         return total
  162.  
  163.     
  164.     def logfiles_for(self, account, buddy):
  165.         global fastFind
  166.         logdir = self.pathfor(account, buddy)
  167.         if not logdir.isdir():
  168.             return []
  169.         if fastFind is not None:
  170.             pathjoin = os.path.join
  171.             
  172.             try:
  173.                 wildcard = pathjoin(logdir, '*-*-*.html')
  174.                 return [ pathjoin(logdir, p) for p in fastFind(wildcard) ]
  175.             except Exception:
  176.                 logdir.isdir()
  177.                 logdir.isdir()
  178.                 print_exc()
  179.                 fastFind = None
  180.             except:
  181.                 logdir.isdir()<EXCEPTION MATCH>Exception
  182.             
  183.  
  184.         logdir.isdir()
  185.         return list((lambda .0: for f in .0:
  186. if filename_format_re.match(f.name):
  187. fcontinue)(logdir.files('*.' + self.OutputType)))
  188.  
  189.     
  190.     def pathfor(self, account, buddy):
  191.         return self.OutputDir.joinpath(buddy_path(account, buddy))
  192.  
  193.     
  194.     def set_outputdir(self, val):
  195.         self.output_dir = path(val)
  196.  
  197.     
  198.     def get_outputdir(self):
  199.         return self.output_dir
  200.  
  201.     OutputDir = property(get_outputdir, set_outputdir, doc = 'where to write logs')
  202.     
  203.     def walk_group_chats(self):
  204.         for service in path(self.OutputDir).dirs():
  205.             for account in service.dirs():
  206.                 group_chat_dir = account / GROUP_CHAT_DIRNAME
  207.                 if group_chat_dir.isdir():
  208.                     for chat_file in group_chat_dir.files():
  209.                         filename = chat_file.namebase
  210.                         
  211.                         try:
  212.                             if filename.count('-') == 2:
  213.                                 time = datetime.strptime(filename, chat_time_format)
  214.                                 roomname = None
  215.                             else:
  216.                                 (time_part, roomname) = filename.split(' - ', 1)
  217.                                 time = datetime.strptime(time_part, chat_time_format)
  218.                                 if isinstance(roomname, str):
  219.                                     roomname = roomname.decode('filesys')
  220.                         except ValueError:
  221.                             continue
  222.                             except Exception:
  223.                                 print_exc()
  224.                                 continue
  225.                             else:
  226.                                 yield dict(time = time, service = service.name, file = chat_file, roomname = roomname)
  227.                         continue
  228.                         continue
  229.                         continue
  230.                         return None
  231.  
  232.  
  233.     
  234.     def get_path_for_chat(self, chat):
  235.         pathdir = path(self.OutputDir) / chat_path(chat.protocol, chat)
  236.         if chat.chat_room_name:
  237.             for f in pathdir.files('*.html'):
  238.                 name = f.namebase
  239.                 if 'T' in name:
  240.                     day_part = name.split('T')[0]
  241.                     
  242.                     try:
  243.                         dt = datetime.strptime(day_part, chat_time_category)
  244.                     except ValueError:
  245.                         pass
  246.  
  247.                     if fromutc(chat.start_time_utc).date() == dt.date():
  248.                         
  249.                         try:
  250.                             (time_part, roomname) = name.split(' - ', 1)
  251.                         except ValueError:
  252.                             pass
  253.  
  254.                         if roomname == chat.chat_room_name:
  255.                             return f
  256.                     
  257.                 fromutc(chat.start_time_utc).date() == dt.date()
  258.             
  259.         
  260.         return pathdir / (convo_time_filename(chat) + '.' + self.OutputType)
  261.  
  262.     
  263.     def write_output(self, output, messageobj):
  264.         convo = messageobj.conversation
  265.         proto = convo.protocol
  266.         if convo.ischat:
  267.             p = self.get_path_for_chat(convo)
  268.         else:
  269.             datefilename = fromutc(messageobj.timestamp).date().isoformat()
  270.             pathelems = (buddy_path(proto, convo.buddy), datefilename)
  271.             p = path(path(self.OutputDir).joinpath(*pathelems) + '.' + self.OutputType)
  272.         if not p.parent.isdir():
  273.             
  274.             try:
  275.                 p.parent.makedirs()
  276.             except WindowsError:
  277.                 e = None
  278.                 if e.winerror == 183:
  279.                     pass
  280.                 else:
  281.                     raise 
  282.                 e.winerror == 183
  283.             
  284.  
  285.         None<EXCEPTION MATCH>WindowsError
  286.         written_size = 0
  287.         if not p.isfile():
  288.             header = globals()['generate_header_' + self.OutputType](messageobj, self.output_encoding)
  289.             written_size += len(header)
  290.             p.write_bytes(header)
  291.         
  292.         written_size += len(output)
  293.         p.write_bytes(output, append = p.isfile())
  294.         return written_size
  295.  
  296.     
  297.     def generate_output(self, messageobj):
  298.         return globals()['generate_output_' + self.OutputType](messageobj, self.output_encoding)
  299.  
  300.     output_encoding = 'utf-8'
  301.  
  302.  
  303. def generate_header_html(messageobj, encoding):
  304.     c = messageobj.conversation
  305.     datefmt = messageobj.timestamp.date().isoformat()
  306.     if c.ischat:
  307.         title = 'Chat in %s on %s' % (c.name, datefmt)
  308.     else:
  309.         title = 'IM Logs with %s on %s' % (c.buddy.name, datefmt)
  310.     return (html_header % dict(title = title.encode('xml'))).encode(encoding, 'replace')
  311.  
  312.  
  313. def generate_output_html(m, encoding = 'utf-8'):
  314.     return (None % html_log_entry(dict = 'buddy' if m.buddy is not None else '', timestamp = m.timestamp.strftime(message_timestamp_fmt), message = m.message, type = m.type, auto = getattr(m, 'auto', False))).encode(encoding, 'replace')
  315.  
  316. class_buddy = {
  317.     'class': 'buddy' }
  318. class_message = {
  319.     'class': 'message' }
  320. class_msgcontent = {
  321.     'class': 'msgcontent' }
  322.  
  323. def parse_html_lxml(html):
  324.     messages = []
  325.     doc = lxml.html.document_fromstring(html, parser = lxmlparser())
  326.     for div in doc.xpath('//html/body/div'):
  327.         
  328.         try:
  329.             message_type = div.attrib.get('class', '')
  330.             if 'message' not in message_type:
  331.                 continue
  332.             
  333.             message_type = message_type.replace('message', '').strip()
  334.             if message_type not in ('incoming', 'outgoing'):
  335.                 continue
  336.             
  337.             buddyname = div.find_class('buddy')[0].text
  338.             timestamp = div.attrib.get('timestamp')
  339.             if timestamp is not None:
  340.                 timestamp = parse_timestamp(timestamp)
  341.             
  342.             message = render_contents(div.find_class('msgcontent')[0])
  343.             auto = boolify(div.attrib.get('auto', 'false'))
  344.         except Exception:
  345.             print_exc()
  346.             continue
  347.  
  348.         messages.append(Message(buddy = S(name = buddyname), timestamp = timestamp, message = message, type = message_type, auto = auto, has_autotext = auto))
  349.     
  350.     return messages
  351.  
  352. _lxmlparser = None
  353.  
  354. def lxmlparser():
  355.     global _lxmlparser
  356.     if _lxmlparser is None:
  357.         _lxmlparser = lxml.html.HTMLParser(encoding = 'utf-8')
  358.     
  359.     return _lxmlparser
  360.  
  361.  
  362. def parse_html_slow(html):
  363.     html = html.decode('utf-8', 'ignore')
  364.     soup = soupify(html, markupMassage = ((br_re, (lambda m: '<br />')),))
  365.     messages = []
  366.     strptime = datetime.strptime
  367.     for div in soup.findAll(message_divs):
  368.         
  369.         try:
  370.             buddyname = div.findAll('span', class_buddy)[0].renderContents(None)
  371.             timestamp = parse_timestamp(div['timestamp'])
  372.             message = div.findAll('span', class_msgcontent)[0].renderContents(None)
  373.             type = div['class'].replace('message', '').strip()
  374.             auto = boolify(div.get('auto', 'false'))
  375.         except Exception:
  376.             print_exc()
  377.             continue
  378.  
  379.         messages.append(Message(buddy = S(name = buddyname), timestamp = timestamp, message = message, type = type, auto = auto))
  380.     
  381.     log_info('parse_html_slow with %d bytes returning %d messages', len(html), len(messages))
  382.     return messages
  383.  
  384.  
  385. def message_divs(tag):
  386.     if tag.name == 'div':
  387.         pass
  388.     return 'message' in dict(tag.attrs).get('class', '')
  389.  
  390. show_logparse_tracebacks = True
  391.  
  392. def parse_html(html):
  393.     global show_logparse_tracebacks
  394.     if sys.platform == 'darwin':
  395.         messages = parse_html_slow(html)
  396.     else:
  397.         
  398.         try:
  399.             messages = parse_html_lxml(html)
  400.         except Exception:
  401.             if __debug__ or show_logparse_tracebacks:
  402.                 print_exc()
  403.                 show_logparse_tracebacks = False
  404.             
  405.             messages = parse_html_slow(html)
  406.             log_info('parsed slow: got %d messages', len(messages))
  407.  
  408.     return messages
  409.  
  410.  
  411. def parse_timestamp(timestamp):
  412.     
  413.     try:
  414.         return datetime.strptime(timestamp, message_timestamp_fmt)
  415.     except Exception:
  416.         return datetime.strptime(timestamp, message_timestamp_fmt_OLD)
  417.  
  418.  
  419.  
  420. def history_from_files(files, logtype = 'html'):
  421.     parse = globals()['parse_' + logtype]
  422.     for logfile in files:
  423.         
  424.         try:
  425.             bytes = tail(logfile, LOGSIZE_PARSE_LIMIT)
  426.         except Exception:
  427.             print_exc()
  428.             continue
  429.  
  430.         for msg in reversed(parse(bytes)):
  431.             yield msg
  432.         
  433.         if len(bytes) < logfile.size:
  434.             break
  435.             continue
  436.     
  437.  
  438. chat_time_format = '%Y-%m-%dT%H.%M.%S'
  439. chat_time_category = '%Y-%m-%d'
  440.  
  441. def convo_time_filename(convo):
  442.     time_part = fromutc(convo.start_time_utc).replace(microsecond = 0).strftime(chat_time_format)
  443.     room_name = convo.chat_room_name
  444.     if room_name:
  445.         return '%s - %s' % (time_part, room_name)
  446.     return time_part
  447.  
  448. import config
  449. USE_LXML = config.platform == 'win'
  450. if USE_LXML:
  451.     from util.htmlutils import to_xhtml
  452. else:
  453.     
  454.     to_xhtml = lambda s: s
  455.  
  456. def modify_message(msgobj):
  457.     msg = getattr(msgobj, 'message', None)
  458.     if msg is not None:
  459.         msgobj.message = to_xhtml(msg)
  460.     
  461.     return msgobj
  462.  
  463. import re
  464. real_br = '<br />'
  465. br_re = re.compile('<br\\s*/?>', re.IGNORECASE)
  466.  
  467. brfix = lambda s: br_re.sub(real_br, s)
  468.