home *** CD-ROM | disk | FTP | other *** search
/ PC Welt 2006 November (DVD) / PCWELT_11_2006.ISO / casper / filesystem.squashfs / usr / share / doc / popularity-contest / examples / popanal.py < prev    next >
Encoding:
Python Source  |  2005-07-02  |  6.8 KB  |  273 lines

  1. #!/usr/bin/python 
  2. #
  3. # Read Debian popularity-contest submission data on stdin and produce
  4. # some statistics about it.
  5. #
  6. import sys, string, time, glob
  7.  
  8. def ewrite(s):
  9.     sys.stderr.write("%s\n" % s)
  10.  
  11.  
  12. class Vote:
  13.     yes = 0
  14.     old_unused = 0
  15.     too_recent = 0
  16.     empty_package = 0
  17.  
  18.     def vote_for(vote, package, entry):
  19.     now = time.time()
  20.     if entry.atime == 0:  # no atime: empty package
  21.         vote.empty_package = vote.empty_package + 1
  22.     elif now - entry.atime > 30 * 24*3600:  # 30 days since last use: old
  23.         vote.old_unused = vote.old_unused + 1
  24.     elif now - entry.ctime < 30 * 24* 3600 \
  25.       and entry.atime - entry.ctime < 24*3600:  # upgraded too recently
  26.         vote.too_recent = vote.too_recent + 1
  27.     else:            # otherwise, vote for this package
  28.         vote.yes = vote.yes + 1
  29.  
  30. UNKNOWN = '**UNKNOWN**'
  31.  
  32. votelist = {}
  33. sectlist = { UNKNOWN : [] }
  34. deplist = {}
  35. provlist = {}
  36. complained = {}
  37. release_list = {}
  38. arch_list = {}
  39. subcount = 0
  40.  
  41.  
  42. def parse_depends(depline):
  43.     l = []
  44.     split = string.split(depline, ',')
  45.     for d in split:
  46.     x = string.split(d)
  47.     if (x):
  48.         l.append(x[0])
  49.     return l
  50.  
  51.  
  52. def read_depends(filename):
  53.     file = open(filename, 'r')
  54.     package = None
  55.  
  56.     while 1:
  57.     line = file.readline()
  58.     if line:
  59.         if line[0]==' ' or line[0]=='\t': continue  # continuation
  60.         split = string.split(line, ':')
  61.  
  62.     if not line or split[0]=='Package':
  63.         if package and (len(dep) > 0 or len(prov) > 0):
  64.         deplist[package] = dep
  65.         for d in prov:
  66.             if not provlist.has_key(d):
  67.             provlist[d] = []
  68.             provlist[d].append(package)
  69.         if package:
  70.         if not sectlist.has_key(section):
  71.             sectlist[section] = []
  72.         if not votelist.has_key(package):
  73.             sectlist[section].append(package)
  74.         votelist[package] = Vote()
  75.         ewrite(package)
  76.         package = None
  77.         if line:
  78.         package = string.strip(split[1])
  79.         section = UNKNOWN
  80.         dep = []
  81.         prov = []
  82.     elif split[0]=='Section':
  83.         section = string.strip(split[1])
  84.     elif split[0]=='Depends' or split[0]=='Requires':
  85.         dep = dep + parse_depends(split[1])
  86.     elif split[0]=='Provides':
  87.         prov = parse_depends(split[1])
  88.         
  89.     if not line: break
  90.     
  91.  
  92. class Entry:
  93.     atime = 0;
  94.     ctime = 0;
  95.     mru_file = '';
  96.  
  97.     def __init__(self, atime, ctime, mru_file):
  98.     try:
  99.         self.atime = long(atime)
  100.         self.ctime = long(ctime)
  101.     except:
  102.         self.atime = self.ctime = 0
  103.     self.mru_file = mru_file
  104.  
  105.  
  106. class Submission:
  107.     # format: {package: [atime, ctime, mru_file]}
  108.     entries = {}
  109.  
  110.     start_date = 0
  111.  
  112.     arch = "unknown"
  113.     release= "unknown"
  114.  
  115.     # initialize a new entry with known data
  116.     def __init__(self, version, owner_id, date):
  117.     self.entries = {}
  118.     self.start_date = long(date)
  119.     ewrite('%s:\n\tSTART: %s' % (owner_id, time.ctime(long(date))))
  120.  
  121.     # process a line of input from the survey
  122.     def addinfo(self, split):
  123.     if len(split) < 4:
  124.         ewrite('Invalid input line: ' + `split`)
  125.         return
  126.     self.entries[split[2]] = Entry(split[0], split[1], split[3])
  127.  
  128.     # update the atime of dependency to that of dependant, if newer
  129.     def update_atime(self, dependency, dependant):
  130.     if not self.entries.has_key(dependency): return
  131.     e = self.entries[dependency]
  132.     f = self.entries[dependant]
  133.     if e.atime < f.atime:
  134.         e.atime = f.atime
  135.         e.ctime = f.ctime
  136.  
  137.     # we found the last line of the survey: finish it
  138.     def done(self, date):
  139.     ewrite('\t STOP: after %d seconds, %d packages'
  140.            % (date - self.start_date, len(self.entries)))
  141.     for package in self.entries.keys():
  142.         e = self.entries[package]
  143.         if deplist.has_key(package):
  144.         for d in deplist[package]:
  145.             self.update_atime(d, package)
  146.             if provlist.has_key(d):
  147.             for dd in provlist[d]:
  148.                 self.update_atime(dd, package)
  149.     for package in self.entries.keys():
  150.         if not votelist.has_key(package):
  151.         if not complained.has_key(package):
  152.             ewrite(('Warning: package %s neither in '
  153.                 + 'stable nor unstable')  % package)
  154.             complained[package] = 1
  155.         votelist[package] = Vote()
  156.         sectlist[UNKNOWN].append(package)
  157.         votelist[package].vote_for(package, self.entries[package])
  158.  
  159.         if not release_list.has_key(self.release):
  160.             release_list[self.release] = 1
  161.         else:
  162.             release_list[self.release] = release_list[self.release] + 1
  163.  
  164.         if not arch_list.has_key(self.arch):
  165.             arch_list[self.arch] = 1
  166.         else:
  167.             arch_list[self.arch] = arch_list[self.arch] + 1
  168.  
  169. def headersplit(pairs):
  170.     header = {}
  171.     for d in pairs:
  172.     list = string.split(d, ':')
  173.     try:
  174.         key, value = list
  175.         header[key] = value
  176.     except:
  177.         pass
  178.     return header
  179.  
  180.  
  181. def read_submissions(stream):
  182.     global subcount
  183.     e = None
  184.     while 1:
  185.     line = stream.readline()
  186.     if not line: break
  187.  
  188.     split = string.split(line)
  189.     if not split: continue
  190.  
  191.     if split[0]=='POPULARITY-CONTEST-0':
  192.         header = headersplit(split[1:])
  193.  
  194.         if not header.has_key('ID') or not header.has_key('TIME'):
  195.         ewrite('Invalid header: ' + split)
  196.         continue
  197.  
  198.         subcount = subcount + 1
  199.         ewrite('#%s' % subcount)
  200.         e = Submission(0, header['ID'], header['TIME'])
  201.  
  202.             if header.has_key('POPCONVER'):
  203.         if header['POPCONVER']=='':
  204.                 e.release = 'unknown'
  205.                 elif header['POPCONVER']=='1.27.bill.1':
  206.                     e.release = '1.27'
  207.         else:
  208.                 e.release = header['POPCONVER']
  209.     
  210.             if header.has_key('ARCH'):
  211.             if header['ARCH']=='x86_64':
  212.                     e.arch = 'amd64'
  213.             elif header['ARCH']=='i386-gnu':
  214.                     e.arch = 'hurd-i386'
  215.         elif header['ARCH']=='':
  216.                     e.arch = 'unknown'
  217.         else:
  218.                     e.arch = header['ARCH']
  219.  
  220.     elif split[0]=='END-POPULARITY-CONTEST-0' and e != None:
  221.         header = headersplit(split[1:])
  222.         if header.has_key('TIME'):
  223.         try:
  224.           date = long(header['TIME'])
  225.         except: 
  226.           ewrite('Invalid date: ' + header['TIME'])
  227.           continue
  228.         e.done(date)
  229.         e = None
  230.  
  231.     elif e != None:
  232.         e.addinfo(split)
  233.     # end of while loop
  234.     ewrite('Processed %d submissions.' % subcount)
  235.  
  236.  
  237. # main program
  238.  
  239. for d in glob.glob('/org/ftp.debian.org/ftp/dists/stable/*/binary-i386/Packages'):
  240.     read_depends(d)
  241. for d in glob.glob('/org/ftp.debian.org/ftp/dists/unstable/*/binary-i386/Packages'):
  242.     read_depends(d)
  243. read_submissions(sys.stdin)
  244.  
  245. def nicename(s):
  246.     new_s = ''
  247.     for c in s:
  248.         if c == '/':
  249.             new_s = new_s + ',';
  250.     elif c in string.letters or c in string.digits or c=='-':
  251.         new_s = new_s + c
  252.     else:
  253.         new_s = new_s + '.'
  254.     return new_s
  255.  
  256. # dump the results
  257. out = open('results', 'w')
  258. out.write("Submissions: %8d\n" % subcount)  
  259. for release in release_list.keys():
  260.     out.write("Release: %-30s %5d\n"
  261.                   % (release, release_list[release]))
  262.  
  263. for arch in arch_list.keys():
  264.     out.write("Architecture: %-30s %5d\n"
  265.                   % (arch, arch_list[arch]))
  266. for section in sectlist.keys():
  267.     for package in sectlist[section]:
  268.     fv = votelist[package]
  269.     out.write("Package: %-30s %5d %5d %5d %5d\n"
  270.           % (package, fv.yes, fv.old_unused,
  271.              fv.too_recent, fv.empty_package))
  272.  
  273.