home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
pyth_os2.zip
/
python-1.0.2
/
Demo
/
lutz
/
sets.py
< prev
next >
Wrap
Text File
|
1994-02-04
|
44KB
|
1,460 lines
#
# RelDB
#
# set processing in python
#
# defines 3 items:
# class Set -- basic, generic set processing
# class RSet -- a Set, with relational algebra
# test_sets -- a testing function
#
# a set object (Set):
# (1) is an unordered collection
# (2) allows heterogeneous collections
# (3) can grow and shrink arbitrarily
#
# a relational set object (RSet):
# (1) is a Set object
# (2) supports relational algebra operations
#
# users can initialize and concatenate
# sets with normal lists, character strings,
# tuples (in python), other sets, and other
# user-defined sequence objects; users
# can also use any sequence type (built-in
# or user-defined) on the left-hand side
# of most binary set operations;
#
# sets are unordered collections of arbitrary
# data items: items can be a mixture of any
# data type (but some operations assume a
# particular node format); supports most
# common set operations, as well as some
# relational algebra operators (via the Rset
# subclass);
#
# sets are implemented using Python lists (but
# we could use other forms), and encapsulated in
# a class; there is a conversion to strings;
#
# there are many ways to represent relational
# database tuples and tables in Python:
# --character strings (and split fields apart)
# --simple lists ([[name,value], [name,value],...])
# --Pythin tuples ( (value,value,..) or ((name,value), (name,value),..)
# --lists with seperate name lists (['f1','f2'..], [a,b,..], [c,d,..])
# --empty classes (class T: x = T(); x.f1=a, x.f2=b; ...)
# --dictionaries ( {'name':value, 'name':value,....}, {...}, .. )
#
# we use dictionaries here for clarity, but they're
# probably not the most efficient alternative (hashing
# versus indexing); each tuple is a dictionary, and
# a table is just a list of dictionaries; we also don't
# load tables from an external file-- they are assumed to
# be already present in the program (dictionary constants
# in python); dictionaries are slow to copy and compare
# in python, so they're probably a poor representation
# for tuples/records;
#
# notes:
# 1) Uses python's OOP class facility for sets. We only
# use inheritance to define the RSet() relational set
# subclass of Set(). Since there's little over-riding of
# methids in Rset, OOP isn't strictly necessary (we could
# a module of library functions that operate on simple
# lists instead).
#
# We could, in priciple, also sub-classes to define
# different internal formats (list sets, string sets,
# etc) this would require making the concatenation
# operation virtual, since '+' needs the same type on
# each side (different singleton syntax). Since we
# use lists internally, we can use the faster '.append'
# list method to concatenate onto a set (versus '+').
#
# 2) Python's 'in' and subscripting are generic, but '+'
# requires the same types on left and right.
# Python's '==' (and therefore 'in') handles lists,
# tuples, dictionaries and strings right (its not
# just address comparison ('is' does that): strings
# compare lexically, lists/tuples are traversed
# recursively, and dictionaries are compared
# recursively after their fields are sorted.
#
# 3) We do minimal error checking here: you'll get
# a normal subscripting or dictionary exception if
# you do something weird with a set;
#
# 4) example usage:
# python
# >>> import sets
# >>> sets.test_sets()
#
# >>> from sets import *
# >>> test_sets()
# >>> import pbd
# >>> pdb.run('test_sets()')
#
# >>> x = Set().init('abd') (interactive)
# >>> y = Set().init('db')
# >>> x.intersect(y)
#
# >>> from sets import *
# >>> test_sets()
# >>> from sets import *
# >>> table1.join(table2).list([]) (tables exported)
##############################################################
##############################################################
class Set:
#######################################################
# init() takes an optional argument, and allows the
# init value to be a list, character string, tuple,
# another existing set, or any sequence class object;
#######################################################
def init(self, *value):
self.data = []
if value:
self.concat(value[0])
return self
def display(self):
print
print '<',
for x in self.data: print x,
print '>'
def list(self):
print
for x in self.data: print x
def to_string(self):
result = ''
for x in self.data: result = result + x
return result
def to_list(self):
return self.data[:] # copy list
def to_RSet(self):
return RSet().init(self.data) # see notes at RSet class
#################################################
# note that self.data can be accessed directly;
# we need copy() since assignment just binds a
# variable to an existing object: changing one
# variable may effect another variable's value;
#################################################
def get(self):
return self.data
def set(self, value):
self.data = []
self.concat(value) # allow any seqence object
def copy(self):
return Set().init(self) # or copy via self.data[:]
############################################################
# insertion, deletion, duplication (in place);
# concat() allows adding lists, strings, tuples, other
# sets, and any other user-defined sequence class object,
# to a given set object;
#
# concat() uses 'in' to generically iterate over any
# sequence type: lists, strings, tuples, and any user-
# defined sequence class; any user-defined class
# which overloads '__len__' and '__getitem__' methods
# will work with 'in' generation; our Set class does
# this too, so 'value' can be another set, and we don't
# really need a special type test of the form:
# if type(value) == type(self):
# value = value.data
#
# notes:
# 1) 'x.concat(y.get())' = 'x.concat(y)' = 'x = x + y'
# 2) 'list.append(x)' is faster than 'list = list + [x]'
# 3) 'type(self)==type(value)': all class instances
# have the same type, so this only tells us it's an
# instance-- we assume its a Set() instance as well;
# we need type() since classes can;t over-ride 'in';
#
# 4) deletion can be coded many ways in python:
# try:
# self.data.remove(value)
# except:
# pass
#
# del self.data[self.data.index(value)]
#
# for i in range(len(self.data)):
# if self.data[i] == value
# sef.data = self.data[:i] + self.data[i+1:]
###########################################################
def concat(self, value):
for x in value: # generic iterator
self.data.append(x)
def delete(self, value):
if value in self.data:
self.data.remove(value)
def delete_all(self, value):
while value in self.data:
self.delete(value) # Set delete, not list
def delete_set(self, sequence):
for x in sequence:
self.delete(x) # in-place 'difference'
def repeat(self, copies):
self.data = self.data * copies
########################################################
# searching (index), function apply and mapping, etc.;
# Pyton has an apply(func,args), but it doesn't
# apply the funtion to a list;
########################################################
def search(self, value):
for i in range(len(self.data)): # if value in self.data:
if self.data[i] == value: # return self.data.index(value)
return i
return -1
def apply(self, func):
for x in self.data: func(x)
def map(self, func):
result = []
for x in self.data: result.append(func(x))
return Set().init(result)
def accum(self, func, start):
result = start
for x in self.data: result = func(result, x)
return result
def accum_op(self, optr, start):
result = start
for x in self.data: result = eval(`result` + optr + `x`)
return result
def sum(self): return self.accum_op('+', 0)
def prod(self): return self.accum_op('*', 1)
def abs(self): return self.map(abs)
def neg(self): return self.map(negate)
def square(self): return self.map(square)
##########################################################
# a set member generator;
#
# these routines are mostly superfluous, since we
# can generate set nodes using 'in' in for loops:
# for x in set:
# process(x)
#
# we can do this because we've overloaded len() and
# x[i] to work with set instances (see below); this
# allows 'in' to generate items in a sequence class;
#
# makes a copy of the set's list (using [:] slices),
# so that generation ignores set changes between next()
# calls (an index/counter can become wrong);
##########################################################
def first(self):
self.tail = self.data[:]
return self.next()
def next(self):
if self.tail == []:
return []
else:
head, self.tail = self.tail[0], self.tail[1:]
return head
############################################
# basic set operations;
# uses very simplistic algorithms;
# also available with operators &, |, ^ ;
#
# note: the right-hand operand to these
# methods ('other') can be another Set,
# or any built-in or user-defined sequence
# type (lists, character strings, tuples.
# other class objects): 'in' generates Set
# items too;
#
# since 'in' also iterates over the left
# operand ('self'), we don't really need
# to use 'self.data' in the 'if' and 'for'
# statements in the code below (we could
# just use 'self'); but the left must
# be a Set object to call the methods,
# and using '.data' avoids 1 method call;
############################################
def intersect(self, other):
result = []
for x in self.data:
if x in other: # not 'other.data'
result.append(x)
return Set().init(result)
def union(self, other):
result = self.data[:] # copy list
for x in other:
if not x in self.data:
result.append(x)
return Set().init(result)
def difference(self, other):
result = []
for x in self.data:
if not x in other:
result.append(x)
return Set().init(result)
################################################################
# set membership, occurrence counts, equivalence, subsumption;
#
# member() and occurs() are not strictly needed:
#
# member ->
# 'in' works with Set objects, since Set overloads the
# '__len__' and '__getitem__' methods ('in' works on all
# sequences generically); so we can directly code:
# if value in set:
# process..
# occurs ->
# we can simply use the built-in count() list method:
# self.data.count(value)
#
# notes:
# 1) 2 sets are 'equivalent' if they have exactly the same
# elements, but in possibly different orders;
#
# 2) a set 'subsumes' another, if all it contains (at
# least) all elements in the other set, in any order,
# plus arbitrarily many other items (it must be at least
# as long as the subsumed set); this is the same as
# saying the operand is a _subset_ of the object;
###############################################################
def member(self, value):
return (value in self.data) # boolean 1|0
def occurs(self, value):
i = 0
for x in self.data:
if x == value: i = i+1 # .count
return i
def equiv(self, other):
copy = other.data[:] # dont change other
for x in self.data:
if not x in copy: # other must be a set
return 0
copy.remove(x)
return (copy == [])
def subsume(self, other):
copy = self.data[:]
for x in other.data:
if not x in copy:
return 0
copy.remove(x)
return 1
############################################################
# bagof() variants:
#
# create a new set of items composed of items in the
# set that satisfy/succeed a given testing function;
#
# bagof() is a generic dispatcher; it selects one
# of 4 bagof methods based on the datatype of the
# argument and/or the numberof arguments passed
#
# bagof1() takes a function which must take exactly one
# argument (set item), and return a 0/1 boolean result;
#
# bagof2() takes an arbitrary expression (a string),
# which should evaluate to 0/1 boolean result; in
# the expression string, the variable name 'x' will
# reference the generated set item (eval() evaluates
# an expression in the context of the eval() call);
#
# bagof3() is bagof2(), except that it creates a
# function enclosing the expression, and passes it
# to bagof() (rather than calling eval() repeatedly)
# (exec() is like eval(), but allows full statements)
#
# bagof4() takes a generation variable, a function,
# and a list of arguments to send the function in the
# form of a tuple; it binds the generation variable
# to successive set items, and calls the function
# with an argument list composed of members of the
# tuple; the set item can be referenced by using
# the generation variable in the argument list tuple;
#
# bagof5() is like bagof4(), except that is always
# places the generated item in argument 1, in the
# call to the testing funtion; this simplifies the
# interface;
#
# notes:
# 1) 'apply' in bagof4() refers to the built-in apply,
# not the Set method (need self.apply to get to method)
#
# 2) The string expression variants must be used with
# care: you must concatenate the string representation
# of values into the expr; this requires backquotes
# for lists and tuple; since Set overloads __repr__,
# sets can be printed with backquotes, without having
# to call the '.get()' method to get the set's value list
#
# 3) bagof3() must do eval('_f') to get the function it
# just created; using 'global _f' does not seem to avoid
# this: '_f' is likely always stored in the local scope
# when 'def' runs, and the '_f'' refence maps to the global
# scope statically (since its not assigned) ???
#
# 4) bagof4() is a bit tricky...
# Python passes parameters by value (copy)-- this
# really means passing by object reference: the formal
# parameter initially 'shares' the object the actual
# parameter is bound to, but the 2 diverge as soon as
# the formal parameter is reassigned in the function.
#
# But, when the formal/actual share a 'mutable' object,
# changes made to it through the formal will be
# reflected in the actual. In other words, pass by
# reference can be simulated by using a list object
# enclosing the item, and changing the value of
# the list node.
#
# Also, python has no delayed evaluation, except
# for strings: arguments are fully evaluated in a
# call before the call is made, so use a variable
# in a call that we hope will become successively
# bound insode the function [bagof4(x, member, (x,y))]
#
# For both these reasons, bagof4() requires that
# the generation variable be passed in as a 1-item
# list, and that the test function expect the 1-item
# list to be passed in; it rebinds list[0], and
# so changes the value of the generation variable;
############################################################
def bagof(self, *args):
if len(args) == 1:
if type(args[0]) == type(''):
return self.bagof3(args[0]) # string expr
else:
return self.bagof1(args[0]) # 1-arg function
elif len(args) == 2:
return self.bagof5(args[0], args[1]) # func+extra-args
elif len(args) == 3:
return self.bagof4(args[0], args[1]. args[2]) # gen+func+args
else:
raise set_error
def bagof1(self, func):
res = []
for x in self.data:
if func(x): res.append(x)
return Set().init(res)
def bagof2(self, expr):
res = []
for x in self.data:
if eval(expr): res.append(x)
return Set().init(res)
def bagof3(self, expr):
exec('def _f(x): return ' + expr + '\n')
return self.bagof1(eval('_f'))
def bagof4(self, gen, func, args):
res = []
for gen[0] in self.data:
if apply(func, args): res.append(gen[0])
return Set().init(res)
def bagof5(self, func, args):
res = []
for x in self.data:
if apply(func, (x,) + args): res.append(x)
return Set().init(res)
#
# usage examples
#
def intersect1(self, other):
global global_list
global_list = other
return self.bagof(in_global_list)
# def in_global_list(x): return x in global_list
def intersect2(self, other):
return self.bagof2('(x in ' + `other` + ')')
def intersect3(self, other):
return self.bagof3('(x in ' + `other` + ')')
def intersect4(self, other):
gen = [[]]
return self.bagof4(gen, member2, (gen, other))
# def member2(x,y): return (x[0] in y)
def intersect5(self, other):
return self.bagof5(member, (other,))
# def member(x,y): return (x in y)
###################################################################
# sorting;
#
# 3 sorts:
# sort() does a simple list sort, and uses the builtin method;
# sort_keyed() sorts a list of tuples on a given field's value;
# sort_keyed2() is like sort_keyed(), but it builds a comparison
# function so it can use the built-in sort() method
#
# notes:
# 1) sort_keyed() uses a simple insertion sort (others are better)
# 2) sort_keyed2() must construct a function, since it must
# select field values before calling cmp() (and .sort only
# alows for the 2 operands as parameters)
# 3) Python has a built-in '.reverse()' method for lists; we
# write our own for illustration only
# 4) sort() must make an explicit copy of self's list: we can't
# just code 'copy = self.data', or else we'd wind up sorting
# 'self's list too (.sort is in-place, and '=' just binds)
# 5) sort_keyed2() must backquote the field name (so it gets
# quoted), and calleval() to fetch the created function (see
# the bagof() stuff for more setails)
###################################################################
def sort(self):
copy = self.copy()
copy.data.sort() # list sort, not set sort
return copy
def sort_keyed(self, field): # simple insertion sort
result = []
for x in self.data:
i = 0
for y in result:
if x[field] <= y[field]: break
i = i+1
result[i:i] = [x] # or, result.insert(i,x)
return Set().init(result)
def sort_keyed2(self, field):
func = 'def _cmp(x,y): return cmp(x['+`field`+'], y['+`field`+'])\n'
exec(func)
copy = self.copy()
copy.data.sort(eval('_cmp'))
return copy # cmp() is builtin: -1,0,+1
#########################################################
# permutations(): generate all permutations of the set
# reversed(): reverse a list manually (not in place)
# subsets(): geneate all subsets of length n
# combos(): generate all combinations of length n
#
# notes:
# 1) these methods use recursive functions outside
# the class; the outside functions avoid creating
# new Set objects at each level of the recursion;
#
# 2) subsets() treats '[a,b]' as equivalent to
# '[b,a]' (and only generates 1 instance); combos()
# treats the 2 cases as different (and gens both--
# order matters); combos() is really just a length
# (size) limited permutations();
#########################################################
def permutations(self):
return Set().init(permute(self.data))
def reversed(self):
return Set().init(reverse(self.data))
def subsets(self, n):
return Set().init(subset(self.data, n))
def combos(self, n):
if n > len(self.data):
return Set().init()
else:
return Set().init(combo(self.data, n))
##################################################################
# overload some operators to work with Set instances;
# Python uses special method names to overload operators,
# instead of special syntax (as in C++);
#
# '+' and '*' works for sets as for normal lists, but '+'
# allows appending any sequence type to the set ('+' is not
# generic for built-in types);
#
# notes:
# 0) it look like difference() and delete_set() are really
# the same (but delete_set( )is done in-place), so '-' and
# '^' are functionally equivalent (oops..);
#
# 1) '+' (and '-' and '*) isn't quite right: 'x = y + value'
# will also change y; to be correct, it should make a copy:
# def __add_(self, value):
# copy = self.copy()
# copy.concat(value)
# return copy
# --> this has now been corrected <--
#
# 2) it's critical that we overload __len__ and __getitem__
# (which correspond to len(), and x[i] indexing): this allows
#
# (1) 'in' to generate items in the set in 'for' loops, and
# (2) 'in' to test membership in conditional expressions
#
# in 'for' loops, 'in' lets us generically iterate over sets
# just like any other sequence type (lists, strings, tuples,
# other user-defined sequence classes); this allows us to
# use just about any sequence type to init or concat onto a
# set, and allows the right-hand-side of a binary set operation
# to be any sequence type; for example, we can intersect
# a set and a character string:
#
# x = Set().init(['abcdefg'])
# x.intersect('aceg')
#
# most binary operations allow non-Set sequence types on the
# right side, but the left side must be a Set object (or else
# we can't invoke a Set method);
#
# since 'in' also iterates over and test membership in sets,
# we don't really need to use 'self.data' in the 'if' and 'for'
# statements in most of the code above: we could just use 'self';
# but, since we know 'self' is a Set (or else we can't call the
# methods, we can avoid 1 or more method calls by explicitly using
# the '.data' field name (we avoid calling the __len__ and
# __getitem__ methods, and instead use the built-in list
# data type's mechanisms (which are presumably faster));
##################################################################
def __cmp__(self, other): return self.equiv(other) # ==, <, >
def __repr__(self): return `self.data` # print set
def __len__(self): return len(self.data) # len()
def __getitem__(self, key): return self.data[key] # set[i]
def __setitem__(self, key, x): self.data[key] = x # set[i]=x
def __delitem__(self, key): del self.data[key] # del set[i]
def __getslice__(self, i, j): return self.data[i:j] # set[i:j]
def __add__(self, value):
res = self.copy(); res.concat(value); return res # set + x
def __sub__(self, value):
res = self.copy(); res.delete_set(value); return res # set - x
def __mul__(self, value):
res = self.copy(); res.repeat(value); return res # set * n
def __or__(self, other): return self.union(other) # set | set
def __and__(self, other): return self.intersect(other) # set & set
def __xor__(self, other): return self.difference(other) # set ^ set
def __neg__(self): return self.reversed() # -set
def __pos__(self): return self.sort() # +set
##################################################
# some utility functions called by the methods
# see descriptions at calling methods;
#
# notes:
# 1) 'member' here is global in the module; it
# does not clash with the 'member' Set method,
# since that function can only be reached via
# qualification (instance.member(), Set.member())
##################################################
set_error = 'Set class error'
def negate(n): return -n
def square(n): return n * n
def member(x,y): return (x in y)
def member2(x,y): return (x[0] in y) # for intersect4()
def in_global_list(x): return x in global_list # for intersect1()
def permute(list):
if list == []:
return [[]]
else:
result = []
for i in range(len(list)):
pick = list[i]
rest = list[:i] + list[i+1:]
for x in permute(rest):
result.append([pick] + x)
return result
def reverse(list):
result = []
for x in list:
result = [x] + result
return result # or: copy = list; copy.reverse()
# [help], 4 -> [help]
# [help], 3 -> [hel], [hep], [hlp], [elp]
# [help], 2 -> [he], [hl], [hp], [el], [ep] [lp]
# [help], 1 -> [h], [e], [l], [p]
def subset(list, size):
if size == 0 or list == []:
return [[]]
else:
result = []
for i in range(0, (len(list) - size) + 1):
pick = list[i]
rest = list[i+1:] # drop [:i]
for x in subset(rest, size - 1):
result.append([pick] + x)
return result
# [help], 4 -> [help], [hepl], [hlep], [hlpe], [hpel], ...
# [help], 3 -> [hel], [hep], [hle], [hlp], [hpe], [hpl], [ehl], ...
# [help], 2 -> [he], [hl], [hp], [eh], [el], [ep], [lh], [le], ...
# [help], 1 -> [h], [e], [l], [p]
def combo(list, size):
if size == 0 or list == []:
return [[]]
else:
result = []
for i in range(len(list)):
pick = list[i]
rest = list[:i] + list[i+1:]
for x in combo(rest, size-1):
result.append([pick] + x)
return result
##########################################################
##########################################################
# RSet() subclass: basic relational algebra operations;
# assumes items in the set are 'tuples', which
# are implemented as Python dictionaries (name:value)
#
# implemented as a subclass instance of the general
# class Set(), since these operations require a
# specific set item format; note that RSet
# inherits all Set methods, including its operator
# overloadings;
#
# adds the following methods:
# select: returns a new set = old where field = value
# find: like select, but allow general comparison tests
# match: a generalized select (keys in another set)
# join: implements normal 'natural' join on a field
# product: a simple cartesian product operation
# unique: removes duplicate tuples from a copied set
# project: normal 'projection' operation
# copy_tuple: copies a tuple from a table
# input_tuple: enters tuple fields, and adds to the table
# input_list: like input, but field names in a list
# add_tuple: pass in a list/tuple/string of values
#
# over-rides these methods:
# init
# list
# to_string
#
# notes:
# 1) list() allows an optional argument giving the
# names of the fields to be displayed first (any other
# fields of a tuple are given in any order after
# the fields in the list() argument). Passing in an
# empty list forces the [name]=value format.
#
# 2) find() could be more general: use bagof()
# directly for arbitrary selection expressions.
# 'or' can sometimes be simulated by using optr
# 'in' with a list of values, but full-blown
# boolean selection exprs must use bagof();
# find() needs backquotes around the field name
# so it's quoted in the resulting expr, as well
# as around the value;
#
# 3) join() and product() must eplicitly copy each
# tuple dictionary: 'copy = dict' just makes copy
# _share_ the object that dict references, and any
# changes in copy change dict too
#
# 4) we compare tuples and test for tuple membership
# in a table using dictionary equality; 'in' works for
# lists of dictionaries in python: it uses dictionary
# equality, which sorts the fields, and compares them
# leixcographically (and compares their values); this
# is slow, but works (the 'is' operator tests whether
# 2 variables reference the _same_ object);
#
# 5) product() simply concatenates the 2 tuples,
# renaming same-named fields, by appending '_N' to
# duplicae field names (N uniquely identifies the
# field in the resulting tuple); join() atually
# deletes same-named fields (in the right operand);
#
# 6) input_tuple() and add_tuple() get the field
# list from an existing tuple in the table, so the
# table can't be empty (and all tuples should have
# the same fields in any given table (relational));
# input_tuple() and input_list() allow any type
# value to be entered for a field: it calls eval()
# with the input string, to force the python parser
# to determine its type by syntax-- users should type
# values using normal pyth syntax ('abd', 123, [..])
#
# 7) These methods return a RSet object, not a Set
# object, so their results can have further rel ops
# applied to them. In general, Set and RSet objects
# can be mixed: the left operaand must be an RSet to
# invoke the methods, but the left ('other') can be
# a RSet, a normal Set, or any built-in or user-def'd
# sequence object (list, tuple, other class). Since
# Python is dynamically typed, Sets and RSets can
# be passed to generic routines; However, we provide
# a conversion from Set's to RSet's with a simple Set
# method:
#
# def to_RSet(self): return RSet().init(self.data)
#
# This is needed if you use a simple Set() method
# which returns a Set, and want to perform RSet
# operations on the result. There is no good reason
# for a RSet -> Set conversion (RSet's _are_ Set's)
##########################################################
##########################################################
class RSet(Set):
def init(self, *value):
if value:
return Set.init(self, value[0])
else:
return Set.init(self)
def list(self, *first):
print
print 'table =>'
for x in self.data:
if not first:
print ' tuple =>', x
else:
print ' tuple =>',
for f in first[0]:
print '['+f+']=' + `x[f]`, # backquotes
for f in x.keys():
if not f in first[0]:
print '['+f+']=' + `x[f]`,
print
def to_string(self):
raise rset_error, 'cannot convert to a string'
def select(self, field, value):
result = []
for x in self.data:
if x[field] == value:
result.append(x)
return RSet().init(result) # return RSet, not Set
def find(self, field, cmp, value):
return \
self.bagof('x['+ `field` +'] ' + cmp + ' ' + `value`).to_RSet()
def match(self, other, field):
result = []
for x in self.data:
for y in other:
if y[field] == x[field]:
result.append(x)
return RSet().init(result)
def join(self, other, field):
result = []
for x in self.data:
for y in other:
if y[field] == x[field]:
compos = self.copy_tuple(x)
for k in y.keys():
if not x.has_key(k):
compos[k] = y[k]
result.append(compos)
return RSet().init(result)
def product(self, other):
result = []
for x in self.data:
for y in other:
compos = self.copy_tuple(x)
for k in y.keys():
if not x.has_key(k):
compos[k] = y[k]
else:
i = 1
while x.has_key(k + '_' + `i`):
i = i+1
compos[k + '_' + `i`] = y[k]
result.append(compos)
return RSet().init(result)
def unique(self):
result = []
for x in self.data:
if not x in result:
result.append(x)
return RSet().init(result)
def project(self, fields):
result = []
for x in self.data:
tuple = {}
for y in fields:
if x.has_key(y):
tuple[y] = x[y]
if tuple and not tuple in result:
result.append(tuple)
return RSet().init(result)
def copy_tuple(self, tup):
res = {}
for field in tup.keys():
res[field] = tup[field]
return res
def input_tuple(self):
self.input_list(self.data[0].keys()) # use tuple's fields
def input_list(self, fields):
tup = {}
for x in fields:
valstr = raw_input(x + ' => ')
tup[x] = eval(valstr) # any type: parse it
self.data.append(tup)
def add_tuple(self, values): # types already known
tup, i = {}, 0
if len(values) != len(self.data[0]):
raise rset_error, 'add_tuple length'
for x in self.data[0].keys():
tup[x], i = values[i], i+1
self.data.append(tup)
rset_error = 'relational set error'
##############################################################
##############################################################
#
# the test driver
#
# notes:
# 1) `['a', 'b']` (backquotes) == '[\'a\', \'b\']';
# since find() already backquotes the search value,
# we don't always need o do it below;
#
# 2) the 'complete', 'available', and 'object' tables
# are computed with relational operators statically;
# we could make these into something like 'schemas'
# if we store them as a string and eval() them to
# get their values on demand; RSet has no real
# 'schema' (delayed evaluation) facility as is;
#
# 3) test_sets() exports 6 tables to the global
# scope of the module on exit, so they can be used
# at the interactive propmt. The user must do
# another 'from sets import *' _after_ test_sets()
# runs, to get these symbols-- 'import *' statements
# in python get symbols in the module at the time
# the 'import' executes (it does not get symbols
# added to the module's global scope later on).
# This complexity could be avoided by coding the
# tables global (assign them outside test_sets()).
##############################################################
##############################################################
def test_sets():
x = Set().init('abcdefg')
item = x.first()
while item: # generator
print item,
item = x.next()
print
for item in x: print item, # 'in' overload
print
print ('d' in x ), ('z' in x ) # 'in' overload
x = RSet().init([{'name':'x', 'age':0, 'stats':[1,2,3]}])
x.list([])
x.add_tuple(('y', 1, [4,5,6]))
x.add_tuple(('z', 2, [7,8]))
x.list([])
#############################
# a simple employee database
#############################
a = RSet().init( \
[{'name':'mark', 'job':'engineer'}, \
{'name':'amrit', 'job':'engineer'}, \
{'name':'sunil', 'job':'manager'}, \
{'name':'marc', 'job':'prez'}, \
{'name':'martin','job':'architect'}, \
{'name':'jeff', 'job':'engineer'}, \
{'name':'eve', 'job':'administrator'} \
])
b = RSet().init( \
[{'job':'engineer', 'pay':(25000,60000)}, \
{'job':'manager', 'pay':(50000,'XXX')}, \
{'job':'architect','pay':None}, \
{'job':'prez', 'pay':'see figure 1'} \
])
c = RSet().init( \
[{'name':'mark', 'job':'engineer'}, \
{'name':'amrit', 'job':'engineer'}, \
{'name':'sunil', 'job':'manager'}, \
{'name':'john', 'job':'engineer'}, \
{'name':'steve', 'job':'manager'} \
])
a.list()
a.select('job', 'engineer').list()
a.join(b, 'job').list()
a.join(b, 'job').list(['name','pay'])
a.project(['job']).list()
a.select('job', 'engineer').project(['name']).list()
a.find('job', '>', 'engineer').list(['job'])
c.find('job', '!=', 'engineer').list(['job'])
a.bagof('x[\'name\'][0] == \'m\'').to_RSet().list(['name'])
a.bagof('x[\'job\'] > \'engineer\'').to_RSet().list([])
a.bagof('x[\'job\'] > \'engineer\' or x[\'name\'] == \'eve\'').\
to_RSet().list([])
a.project(['job']).difference(b.project(['job'])).list()
a.join(b, 'job').project(['name', 'pay']).list()
a.select('name','sunil').join(b,'job').project(['name', 'pay']).list()
b.product(c).list([])
a.product(c).list([])
a.product(c).unique().list([])
a.product(c).project(['name']).list([])
a.product(c).project(['name', 'name_1']).list([])
a.sort_keyed('name').to_RSet().list(['name'])
a.sort_keyed2('name').to_RSet().list(['name'])
c.sort_keyed('name').to_RSet().list(['name'])
a.difference(c).list()
a.intersect(c).list()
a.union(c).list()
print cmp( ((a ^ c) + (a & c)), a)
print cmp( ((a ^ c) | (a & c)), a)
print cmp( ((a ^ c) | c), a)
print ((a ^ c) + (a & c)).equiv(a)
print ((a ^ c) | c).equiv(a)
print a.difference(c).union(a.intersect(c)).equiv(a)
#########################
# a languages database
#########################
langs = RSet().init([ \
{'name':'prolog', 'type':'logic', 'rank':5, 'use':'aliens'}, \
{'name':'alpha', 'type':'proced', 'rank':3, 'use':'glue'}, \
{'name':'python', 'type':'proced', 'rank':1, 'use':'rad'}, \
{'name':'icon', 'type':'proced', 'rank':1, 'use':'rad'}, \
{'name':'scheme', 'type':'funct', 'rank':2, 'use':'aliens'}, \
{'name':'smalltalk','type':'object', 'rank':4, 'use':'oop'}, \
{'name':'dylan', 'type':'object', 'rank':7, 'use':'glue'}, \
{'name':'self', 'type':'object', 'rank':7, 'use':'oop'}, \
{'name':'rexx', 'type':'proced', 'rank':7, 'use':'shell'}, \
{'name':'tcl', 'type':'proced', 'rank':6, 'use':'shell'}, \
{'name':'d', 'type':'unknown', 'rank':7, 'use':'glue'}, \
{'name':'lisp', 'type':'funct', 'rank':3, 'use':'aliens'}, \
{'name':'perl', 'type':'proced', 'rank':7, 'use':'shell'} \
])
course = RSet().init([ \
{'type':'logic', 'class':'ai'}, \
{'type':'funct', 'class':'ai'}, \
{'type':'proced', 'class':'compilers'}, \
{'type':'logic', 'class':'compilers'}, \
{'type':'object', 'class':'simulation'} \
])
prof = RSet().init([ \
{'class': 'ai', 'prof':'travis'}, \
{'class':'compilers', 'prof':'horwitz'} \
])
dbase = langs.find('use', 'in', ['glue', 'rad', 'oop'])
complete = langs.find('name', 'not in', \
['alpha', 'dylan']) # no backquotes
objects = langs.find('name', 'in', \
['python', 'icon', 'smalltalk', 'self', 'dylan'])
available = langs.find('name', 'not in', \
['dylan', 'self', 'd', 'rexx'])
dbase.list(['name'])
langs.join(course,'type').project(['name','class']).list([])
langs.join(course,'type').join(prof,'class').project(['name','prof']).list()
langs.select('use','aliens').join(course,'type').project(['class']).list()
for x in ['<', '==', '>']:
set = langs.find('rank', x, 3)
set.project(['name', 'rank']).sort_keyed('rank').to_RSet().list([])
worst = langs.find('rank', '>=', 5)
worst.sort_keyed('rank').to_RSet().list(['name', 'rank'])
langs.select('name', 'rad').list([])
langs.select('use', 'rad').list([])
available.list([])
objects.list([])
x = langs.find('use', 'in', ['rad', 'glue'])
(x & complete & objects & available).to_RSet().project(['name']).list([])
#######################
# miscellaneous tests
#######################
print
x = Set().init('abcdefg')
print x.intersect('aczeg')
y = Set().init('aceg')
print x.intersect(y)
z1 = Set().init('gcae')
z2 = z1 + 'x'
print x.subsume(y), x.subsume(z1), x.subsume(z2)
print y.equiv(z1), y.equiv(z2)
x = Set().init(['h', 'e', 'l', 'p'])
x.permutations().list()
for i in range(0, len(x) + 2): # 0..5
x.subsets(i).display()
for i in range(0, len(x) + 2):
x.combos(i).display()
x = Set().init()
x.set([1,2,3])
x.permutations().display()
x = Set().init()
x.concat('alp')
print permute(x.get())
x.concat('po')
x.display()
x.reversed().display()
x.sort().display()
print x
print x.to_string()
print
x = Set().init([1,2,3,4,5,6])
y = Set().init([0,2,4,9,6,8,2])
print x.intersect1(y)
print x.intersect2(y)
print x.intersect3(y) # test bagof generators
print x.intersect4(y)
print x.intersect5(y)
print x.bagof('x in '+`y`) # dont need y.get()
print x.bagof(member, (y,)) # dont need y.get()
###########################
# test operator overloads
###########################
print
x = Set().init('abcd')
y = Set().init('db')
print len(x), x + 'efg', x - y, x * 3 # 'x - [..]' uses coerce()
print x[0], x[0:2]
x[1] = 'z'
del x[0]
print x
print (x == 'abcde'), (x == x)
print x.equiv(Set().init('abcde')), x.equiv(x)
x.set('abcde')
y = Set().init('azce')
z = Set().init('axedy')
print x | z
print x & y
print x ^ z
print -x + z
print x & y & z
print x | y | z
print x ^ y ^ z
print x + y + z
print x - y - z
print (x | y - z).display()
######################################
# export tables for interactive tests
######################################
global table1, table2, table2
table1, table2, table3 = a, b, c
global langs1, course1, prof1
langs1, course1, prof1 = langs, course, prof
#
# sets.test_sets()
#