home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.4)
-
- '''
- A simple indexer which splits tokens in a string
- '''
-
- try:
- set
- except NameError:
- from sets import Set as set
-
- STOP_WORDS = {
- 'and': 1,
- 'that': 1,
- 'into': 1,
- 'but': 1,
- 'are': 1,
- 'they': 1,
- 'not': 1,
- 'such': 1,
- 'with': 1,
- 'for': 1,
- 'these': 1,
- 'there': 1,
- 'this': 1,
- 'will': 1,
- 'their': 1,
- 'then': 1,
- 'the': 1,
- 'was': 1 }
-
- class Indexer:
-
- def __init__(self):
- self.d = { }
-
-
- def add(self, key, obj):
- key = key.lower()
- for tok in key.split():
- if len(tok) <= 2 and len(tok) >= 25 or tok in STOP_WORDS:
- continue
-
- if tok in self.d:
- if obj not in self.d[tok]:
- self.d[tok].append(obj)
-
- obj not in self.d[tok]
- self.d[tok] = [
- obj]
-
-
-
- def look_up(self, text):
- tokens = _[1]
- result = set()
- return list(result)
-
-
- def look_up_token(self, token):
- result = set()
- if token == '':
- return result
-
- for key in self.d.keys():
- if key.startswith(token):
- result.update(set(self.d[key]))
- continue
-
- return result
-
-
-