home *** CD-ROM | disk | FTP | other *** search
/ Mac Easy 2010 May / Mac Life Ubuntu.iso / casper / filesystem.squashfs / usr / share / pyshared / launchpadbugs / html_buglist.py < prev    next >
Encoding:
Python Source  |  2008-08-05  |  12.9 KB  |  279 lines

  1. """
  2. TODO:
  3.     * maybe move initialisation of libxml2 parser to utils
  4. """
  5.  
  6. import libxml2
  7. import urlparse
  8. import re
  9. from bugbase import LPBugInfo, Bug as LPBug
  10. from buglistbase import LPBugList, LPBugPage
  11. from lphelper import user, unicode_for_libxml2, sort
  12. from lpconstants import BASEURL
  13. from exceptions import parse_error
  14. from lptime import LPTime
  15. from utils import valid_lp_url, bugnumber_to_url
  16.  
  17. #deactivate error messages from the validation [libxml2.htmlParseDoc]
  18. def noerr(ctx, str):
  19.     pass
  20. libxml2.registerErrorHandler(noerr, None)
  21.  
  22.  
  23. class BugInfo(LPBugInfo):
  24.     # TODO: use same attribute names like Bug.Bug
  25.     def __init__(self, nr, url, status, importance, summary, package=None, all_tasks=False):
  26.         url = valid_lp_url(url, BASEURL.BUG)
  27.         LPBugInfo.__init__(self, nr, url, status, importance, summary, package, all_tasks)
  28.         
  29. class BugInfoWatches(LPBugInfo):
  30.     def __init__(self, lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, private, all_tasks):
  31.         url = valid_lp_url(lp_url, BASEURL.BUG)
  32.         LPBugInfo.__init__(self, lp_bugnr, url, None, None, lp_title, None, all_tasks)
  33.         
  34.         # add additional attributes
  35.         self.watch_url = watch_url or "unknown"
  36.         self.watch_bugnr = watch_bugnr
  37.         self.watch_status = watch_status or "unknown"
  38.         self.private = private
  39.         
  40. re_url_to_project = re.compile(r'/(\w+)/\+bug/\d+')
  41. class ExpBugInfo(LPBugInfo):
  42.     def __init__(self, bugnumber, url, importance, summary, private, package, date_last_update, all_tasks=False):
  43.         url = valid_lp_url(url, BASEURL.BUG)
  44.         if package is None:
  45.             u = urlparse.urlsplit(url)
  46.             m = re_url_to_project.match(u[2])
  47.             if m:
  48.                 package = m.group(1)
  49.         LPBugInfo.__init__(self, bugnumber, url, None, importance, summary, package, all_tasks)
  50.         
  51.         # add additional attributes
  52.         self.private = private
  53.         self.date_last_update = date_last_update
  54.         
  55.  
  56. class BugPage(LPBugPage):
  57.     """
  58.     grab content of a single bug-table    
  59.     """
  60.     @staticmethod
  61.     def find_parse_function(connection, url, all_tasks):
  62.         url = valid_lp_url(url, BASEURL.BUGPAGE)
  63.         lp_content = connection.get(url)
  64.         xmldoc = libxml2.htmlParseDoc(unicode_for_libxml2(lp_content.text), "UTF-8")
  65.         u = urlparse.urlsplit(url)
  66.         if "+milestone" in u[2]:
  67.             result = BugPage.parse_html_milestone_bugpage(xmldoc, all_tasks, url)
  68.         elif "+expirable-bugs" in u[2]:
  69.             result = BugPage.parse_html_expirable_bugpage(xmldoc, all_tasks, url)
  70.         elif "bugs/bugtrackers" in u[2]:
  71.             result = BugPage.parse_html_bugtracker_bugpage(xmldoc, all_tasks, url)
  72.         else:
  73.             result = BugPage.parse_html_bugpage(xmldoc, all_tasks, url)
  74.         return result
  75.         
  76.     @staticmethod
  77.     def parse_html_bugpage(xmldoc, all_tasks, debug_url):
  78.         def _parse():
  79.             if xmldoc.xpathEval('//div/p[contains(.,"There are currently no open bugs.")]') or xmldoc.xpathEval('//div/p[contains(.,"No results for search")]'):
  80.                 raise StopIteration
  81.                 
  82.             # count number of columns
  83.             # Bug-Pages have seven columns: icon|nr|summary(url)|icon(milestone|branch|blueprint)|package|importance|status
  84.             # personal Bug-Pages have six columns: icon|nr|summary(url)|package|importance|status
  85.             # TODO: look for more simple XPath-statements
  86.             col = int(xmldoc.xpathEval('count(//table[@id="buglisting"]//thead//tr//th[not(@*)])'))
  87.             for span in xmldoc.xpathEval('//table[@id="buglisting"]//thead//tr//@colspan'):
  88.                 col += int(span.content)
  89.  
  90.             assert col == 6 or col == 7, "Parsing of this page (%s) is not \
  91.     supported by python-launchpad-bugs" %debug_url
  92.             bug_table_rows = xmldoc.xpathEval('//table[@id="buglisting"]//tbody//tr')
  93.             for row in bug_table_rows:
  94.                 out = []
  95.                 for i in xrange(2,col+1):
  96.                     if i == 3:
  97.                         expr = 'td[' + str(i) + ']//a'
  98.                     else:
  99.                         expr = 'td[' + str(i) + ']/text()'
  100.                     res = row.xpathEval(expr)
  101.                     parse_error(res, "BugPage.parse_html_bugpage._parse.row[%s]" %i, xml=row, url=debug_url)
  102.                     if i == 3:
  103.                         out.append(res[0].prop("href"))
  104.                     out.append(res[0].content)
  105.                 #drop icon td
  106.                 out.pop(3)
  107.                 # package is optional, move package to the end of the list
  108.                 if len(out) == 6:
  109.                     out.append(out.pop(3))
  110.                 else:
  111.                     out.append(None)
  112.                 yield BugInfo(out[0], out[1], out[4],out[3], out[2], out[5], all_tasks)
  113.  
  114.         next = xmldoc.xpathEval('//div[@class="lesser"]//a[@rel="next"]//@href')
  115.         m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]')
  116.         if m:
  117.             m = m.pop()
  118.             n = re.search(r'(\d+)\s+results?', m.content)
  119.             parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url)
  120.             length = n.group(1)
  121.             n = m.xpathEval("strong")
  122.             batchsize = int(n[1].content) - int(n[0].content) + 1
  123.         else:
  124.             length = batchsize = 0
  125.         if next:
  126.             return _parse(), next[0].content, batchsize, int(length)
  127.         return _parse(), False, batchsize, int(length)
  128.         
  129.     @staticmethod
  130.     def parse_html_milestone_bugpage(xmldoc, all_tasks, debug_url):
  131.         def _parse():
  132.             bug_table_rows = xmldoc.xpathEval('//table[@id="milestone_bugtasks"]//tbody//tr')
  133.             for row in bug_table_rows:
  134.                 x = row.xpathEval('td[1]//span/img')
  135.                 parse_error(x, "BugPage.parse_html_milestone_bugpage.importance", xml=row, url=debug_url)
  136.                 importance = x[0].prop("alt").strip("()").title()
  137.                 x = row.xpathEval('td[2]')
  138.                 parse_error(x, "BugPage.parse_html_milestone_bugpage.nr", xml=row, url=debug_url)
  139.                 nr = x[0].content
  140.                 x = row.xpathEval('td[3]/a')
  141.                 parse_error(x, "BugPage.parse_html_milestone_bugpage.url", xml=row, url=debug_url)
  142.                 url = x[0].prop("href")
  143.                 summary = x[0].content
  144.                 x = row.xpathEval('td[5]//a')
  145.                 if x:
  146.                     usr = user.parse_html_user(x[0])
  147.                 else:
  148.                     usr = user(None)
  149.                 x = row.xpathEval('td[6]/span[2]')
  150.                 parse_error(x, "BugPage.parse_html_milestone_bugpage.status", xml=row, url=debug_url)
  151.                 status = x[0].content
  152.                 x = BugInfo(nr, url, status, importance, summary, None, all_tasks)
  153.                 x.assignee = usr
  154.                 yield x
  155.         m = xmldoc.xpathEval('//h2[@id="bug-count"]')
  156.         length = batchsize = int(m[0].content.split(" ")[0])
  157.         return _parse(), False, batchsize, length
  158.         
  159.     @staticmethod
  160.     def parse_html_bugtracker_bugpage(xmldoc, all_tasks, debug_url):
  161.         def _parse():
  162.             rows = xmldoc.xpathEval('//table[@class="sortable listing" and @id="latestwatches"]/tbody//tr')
  163.             for row in rows:
  164.                 lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, private = [None] * 7
  165.                 data = row.xpathEval("td")
  166.                 parse_error(len(data) == 3, "BugPage.parse_html_bugtracker_bugpage.len_td=%s" %len(data), xml=row, url=debug_url)
  167.                 x = data[0].xpathEval("a")
  168.                 if x:
  169.                     lp_url = x[0].prop("href")
  170.                     lp_bugnr = int(lp_url.split("/").pop())
  171.                     lp_title = x[0].content.split(":", 1)[-1].strip("\n ")
  172.                     
  173.                     x = data[1].xpathEval("a")
  174.                     parse_error(x, "BugPage.parse_html_bugtracker_bugpage.watch_url", xml=row, url=debug_url)
  175.                     watch_url = x[0].prop("href")
  176.                     watch_bugnr = x[0].content
  177.                     
  178.                     watch_status = data[2].content
  179.                 else:
  180.                     x = data[0].content
  181.                     parse_error("(Private)" in x, "BugPage.parse_html_bugtracker_bugpage.private", xml=row, url=debug_url)
  182.                     private = True
  183.                     x = x.split("#").pop()
  184.                     lp_bugnr = int(x.split(":").pop(0))
  185.                     lp_url = bugnumber_to_url(lp_bugnr)
  186.                 b = BugInfoWatches( lp_url, lp_bugnr, lp_title, watch_url,
  187.                                     watch_bugnr, watch_status, bool(private), all_tasks)
  188.                 yield b
  189.  
  190.         next = xmldoc.xpathEval('//td[@class="batch-navigation-links"]//a[@rel="next"]//@href')
  191.         m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]')
  192.         if m:
  193.             m = m.pop()
  194.             n = re.search(r'(\d+)\s+results?', m.content)
  195.             parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url)
  196.             length = n.group(1)
  197.             n = m.xpathEval("strong")
  198.             batchsize = int(n[1].content) - int(n[0].content) + 1
  199.         else:
  200.             length = batchsize = 0
  201.         if next:
  202.             return _parse(), next[0].content, batchsize, int(length)
  203.         return _parse(), False, batchsize, int(length)
  204.         
  205.     @staticmethod
  206.     def parse_html_expirable_bugpage(xmldoc, all_tasks, debug_url):
  207.         def _parse():
  208.             rows = xmldoc.xpathEval('//table[@class="listing" and @id="buglisting"]//tbody//tr')
  209.             for row in rows:
  210.                 col_count = len(row.xpathEval("td"))
  211.                 parse_error( 4 < col_count < 7, "BugPage.parse_html_expirable_bugpage.col_count", xml=row, url=debug_url)
  212.                 m = row.xpathEval("td[1]/img")
  213.                 parse_error( m, "BugPage.parse_html_expirable_bugpage.importance", xml=row, url=debug_url)
  214.                 importance = m[0].prop("title").split()[0]
  215.                 m = row.xpathEval("td[2]")
  216.                 parse_error( m, "BugPage.parse_html_expirable_bugpage.bugnumber", xml=row, url=debug_url)
  217.                 bugnumber = int(m[0].content)
  218.                 m = row.xpathEval("td[3]/a")
  219.                 parse_error( m, "BugPage.parse_html_expirable_bugpage.url", xml=row, url=debug_url)
  220.                 url = m[0].prop("href")
  221.                 summary = m[0].content
  222.                 m = row.xpathEval("td[4]/img")
  223.                 private = False
  224.                 if m:
  225.                     private = m[0].prop("alt").lower() == "private"
  226.                 if col_count == 6:
  227.                     m = row.xpathEval("td[5]")
  228.                     parse_error( m, "BugPage.parse_html_expirable_bugpage.package", xml=row, url=debug_url)
  229.                     package = m[0].content
  230.                     if package == '\xe2\x80\x94':
  231.                         package = None
  232.                     m = row.xpathEval("td[6]")
  233.                     parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.1", xml=row, url=debug_url)
  234.                     date_last_update = LPTime(m[0].content)
  235.                 elif col_count == 5:
  236.                     package = None #this should be the package related to given url
  237.                     m = row.xpathEval("td[5]")
  238.                     parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.2", xml=row, url=debug_url)
  239.                     date_last_update = LPTime(m[0].content)
  240.                 yield ExpBugInfo(bugnumber, url, importance, summary, private, package, date_last_update, all_tasks)
  241.                     
  242.         next = xmldoc.xpathEval('//td[@class="batch-navigation-links"]//a[@rel="next"]//@href')
  243.         m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]')
  244.         if m:
  245.             m = m.pop()
  246.             n = re.search(r'(\d+)\s+results?', m.content)
  247.             parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url)
  248.             length = n.group(1)
  249.             n = m.xpathEval("strong")
  250.             batchsize = int(n[1].content) - int(n[0].content) + 1
  251.         else:
  252.             length = batchsize = 0
  253.         if next:
  254.             return _parse(), next[0].content, batchsize, int(length)
  255.         return _parse(), False, batchsize, int(length)
  256.         
  257.  
  258. class BugList(LPBugList):
  259.     """
  260.     returns a SET of LPBugInfo objects
  261.     searches baseurl and its following pages
  262.     """
  263.     def __init__(self, baseurl, connection=None, all_tasks=False, progress_hook=None):
  264.         if hasattr(baseurl, "baseurl"):
  265.             baseurl.baseurl = valid_lp_url(baseurl.baseurl, BASEURL.BUGLIST)
  266.         else:
  267.             baseurl = valid_lp_url(baseurl, BASEURL.BUGLIST)
  268.         LPBugList.__init__(self, baseurl, connection, all_tasks,
  269.                     BugPage, progress_hook)
  270.         
  271.     def add(self, item):
  272.         assert isinstance(item, (LPBugInfo, LPBug))
  273.         LPBugList.add(self, item)
  274.     
  275.     def sort(self, optsort):
  276.         cmp_func = lambda x, y: sort(x, y, optsort.strip("-"))
  277.         isreverse = optsort.startswith("-")
  278.         return sorted(self, cmp=cmp_func, reverse=isreverse)
  279.