home *** CD-ROM | disk | FTP | other *** search
- #! /usr/bin/env python
-
- """
- combinerefs path
-
- A helper for analyzing PYTHONDUMPREFS output.
-
- When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown
- time Py_Finalize() prints the list of all live objects twice: first it
- prints the repr() of each object while the interpreter is still fully intact.
- After cleaning up everything it can, it prints all remaining live objects
- again, but the second time just prints their addresses, refcounts, and type
- names (because the interpreter has been torn down, calling repr methods at
- this point can get into infinite loops or blow up).
-
- Save all this output into a file, then run this script passing the path to
- that file. The script finds both output chunks, combines them, then prints
- a line of output for each object still alive at the end:
-
- address refcnt typename repr
-
- address is the address of the object, in whatever format the platform C
- produces for a %p format code.
-
- refcnt is of the form
-
- "[" ref "]"
-
- when the object's refcount is the same in both PYTHONDUMPREFS output blocks,
- or
-
- "[" ref_before "->" ref_after "]"
-
- if the refcount changed.
-
- typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS
- output block.
-
- repr is repr(object), extracted from the first PYTHONDUMPREFS output block.
- CAUTION: If object is a container type, it may not actually contain all the
- objects shown in the repr: the repr was captured from the first output block,
- and some of the containees may have been released since then. For example,
- it's common for the line showing the dict of interned strings to display
- strings that no longer exist at the end of Py_Finalize; this can be recognized
- (albeit painfully) because such containees don't have a line of their own.
-
- The objects are listed in allocation order, with most-recently allocated
- printed first, and the first object allocated printed last.
-
-
- Simple examples:
-
- 00857060 [14] str '__len__'
-
- The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS
- output blocks said there were 14 references to it. This is probably due to
- C modules that intern the string "__len__" and keep a reference to it in a
- file static.
-
- 00857038 [46->5] tuple ()
-
- 46-5 = 41 references to the empty tuple were removed by the cleanup actions
- between the times PYTHONDUMPREFS produced output.
-
- 00858028 [1025->1456] str '<dummy key>'
-
- The string '<dummy key>', which is used in dictobject.c to overwrite a real
- key that gets deleted, grew several hundred references during cleanup. It
- suggests that stuff did get removed from dicts by cleanup, but that the dicts
- themselves are staying alive for some reason. """
-
- import re
- import sys
-
- # Generate lines from fileiter. If whilematch is true, continue reading
- # while the regexp object pat matches line. If whilematch is false, lines
- # are read so long as pat doesn't match them. In any case, the first line
- # that doesn't match pat (when whilematch is true), or that does match pat
- # (when whilematch is false), is lost, and fileiter will resume at the line
- # following it.
- def read(fileiter, pat, whilematch):
- for line in fileiter:
- if bool(pat.match(line)) == whilematch:
- yield line
- else:
- break
-
- def combine(fname):
- f = file(fname)
- fi = iter(f)
-
- for line in read(fi, re.compile(r'^Remaining objects:$'), False):
- pass
-
- crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)')
- addr2rc = {}
- addr2guts = {}
- before = 0
- for line in read(fi, re.compile(r'^Remaining object addresses:$'), False):
- m = crack.match(line)
- if m:
- addr, addr2rc[addr], addr2guts[addr] = m.groups()
- before += 1
- else:
- print '??? skipped:', line
-
- after = 0
- for line in read(fi, crack, True):
- after += 1
- m = crack.match(line)
- assert m
- addr, rc, guts = m.groups() # guts is type name here
- if addr not in addr2rc:
- print '??? new object created while tearing down:', line.rstrip()
- continue
- print addr,
- if rc == addr2rc[addr]:
- print '[%s]' % rc,
- else:
- print '[%s->%s]' % (addr2rc[addr], rc),
- print guts, addr2guts[addr]
-
- f.close()
- print "%d objects before, %d after" % (before, after)
-
- if __name__ == '__main__':
- combine(sys.argv[1])
-