home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- from __future__ import with_statement
- __license__ = 'GPL v3'
- __copyright__ = '2010, Gerendi Sandor Attila'
- __docformat__ = 'restructuredtext en'
-
- class tokenDelimitatorStart:
-
- def __init__(self):
- pass
-
-
- def toRTF(self):
- return '{'
-
-
- def __repr__(self):
- return '{'
-
-
-
- class tokenDelimitatorEnd:
-
- def __init__(self):
- pass
-
-
- def toRTF(self):
- return '}'
-
-
- def __repr__(self):
- return '}'
-
-
-
- class tokenControlWord:
-
- def __init__(self, name, separator = ''):
- self.name = name
- self.separator = separator
-
-
- def toRTF(self):
- return self.name + self.separator
-
-
- def __repr__(self):
- return self.name + self.separator
-
-
-
- class tokenControlWordWithNumericArgument:
-
- def __init__(self, name, argument, separator = ''):
- self.name = name
- self.argument = argument
- self.separator = separator
-
-
- def toRTF(self):
- return self.name + repr(self.argument) + self.separator
-
-
- def __repr__(self):
- return self.name + repr(self.argument) + self.separator
-
-
-
- class tokenControlSymbol:
-
- def __init__(self, name):
- self.name = name
-
-
- def toRTF(self):
- return self.name
-
-
- def __repr__(self):
- return self.name
-
-
-
- class tokenData:
-
- def __init__(self, data):
- self.data = data
-
-
- def toRTF(self):
- return self.data
-
-
- def __repr__(self):
- return self.data
-
-
-
- class tokenBinN:
-
- def __init__(self, data, separator = ''):
- self.data = data
- self.separator = separator
-
-
- def toRTF(self):
- return '\\bin' + repr(len(self.data)) + self.separator + self.data
-
-
- def __repr__(self):
- return '\\bin' + repr(len(self.data)) + self.separator + self.data
-
-
-
- class token8bitChar:
-
- def __init__(self, data):
- self.data = data
-
-
- def toRTF(self):
- return "\\'" + self.data
-
-
- def __repr__(self):
- return "\\'" + self.data
-
-
-
- class tokenUnicode:
-
- def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
- self.data = data
- self.separator = separator
- self.current_ucn = current_ucn
- self.eqList = eqList
-
-
- def toRTF(self):
- result = '\\u' + repr(self.data) + ' '
- ucn = self.current_ucn
- if len(self.eqList) < ucn:
- ucn = len(self.eqList)
- result = tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
-
- i = 0
- for eq in self.eqList:
- if i >= ucn:
- break
-
- result = result + eq.toRTF()
-
- return result
-
-
- def __repr__(self):
- return '\\u' + repr(self.data)
-
-
-
- def isAsciiLetter(value):
- if (value >= 'a' or value <= 'z') and value >= 'A':
- pass
- return value <= 'Z'
-
-
- def isDigit(value):
- if value >= '0':
- pass
- return value <= '9'
-
-
- def isChar(value, char):
- return value == char
-
-
- def isString(buffer, string):
- return buffer == string
-
-
- class RtfTokenParser:
-
- def __init__(self, tokens):
- self.tokens = tokens
- self.process()
- self.processUnicode()
-
-
- def process(self):
- i = 0
- newTokens = []
- while i < len(self.tokens):
- if isinstance(self.tokens[i], tokenControlSymbol):
- if isString(self.tokens[i].name, "\\'"):
- i = i + 1
- if not isinstance(self.tokens[i], tokenData):
- raise Exception('Error: token8bitChar without data.')
- isinstance(self.tokens[i], tokenData)
- if len(self.tokens[i].data) < 2:
- raise Exception('Error: token8bitChar without data.')
- len(self.tokens[i].data) < 2
- newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
- if len(self.tokens[i].data) > 2:
- newTokens.append(tokenData(self.tokens[i].data[2:]))
-
- i = i + 1
- continue
-
-
- newTokens.append(self.tokens[i])
- i = i + 1
- self.tokens = list(newTokens)
-
-
- def processUnicode(self):
- i = 0
- newTokens = []
- ucNbStack = [
- 1]
- while i < len(self.tokens):
- if isinstance(self.tokens[i], tokenDelimitatorStart):
- ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
- newTokens.append(self.tokens[i])
- i = i + 1
- continue
-
- if isinstance(self.tokens[i], tokenDelimitatorEnd):
- ucNbStack.pop()
- newTokens.append(self.tokens[i])
- i = i + 1
- continue
-
- if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
- if isString(self.tokens[i].name, '\\uc'):
- ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
- newTokens.append(self.tokens[i])
- i = i + 1
- continue
-
- if isString(self.tokens[i].name, '\\u'):
- x = i
- j = 0
- i = i + 1
- replace = []
- partialData = None
- ucn = ucNbStack[len(ucNbStack) - 1]
- while i < len(self.tokens) and j < ucn:
- if isinstance(self.tokens[i], tokenDelimitatorStart):
- break
-
- if isinstance(self.tokens[i], tokenDelimitatorEnd):
- break
-
- if isinstance(self.tokens[i], tokenData):
- if len(self.tokens[i].data) >= ucn - j:
- replace.append(tokenData(self.tokens[i].data[0:ucn - j]))
- if len(self.tokens[i].data) > ucn - j:
- partialData = tokenData(self.tokens[i].data[ucn - j:])
-
- i = i + 1
- break
- else:
- replace.append(self.tokens[i])
- j = j + len(self.tokens[i].data)
- i = i + 1
-
- if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
- replace.append(self.tokens[i])
- i = i + 1
- j = j + 1
- continue
-
- raise Exception('Error: incorect utf replacement.')
- replace = []
- newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
- if partialData != None:
- newTokens.append(partialData)
- continue
- continue
-
-
- newTokens.append(self.tokens[i])
- i = i + 1
- self.tokens = list(newTokens)
-
-
- def toRTF(self):
- result = []
- for token in self.tokens:
- result.append(token.toRTF())
-
- return ''.join(result)
-
-
-
- class RtfTokenizer:
-
- def __init__(self, rtfData):
- self.rtfData = []
- self.tokens = []
- self.rtfData = rtfData
- self.tokenize()
-
-
- def tokenize(self):
- i = 0
- lastDataStart = -1
- while i < len(self.rtfData):
- if isChar(self.rtfData[i], '{'):
- if lastDataStart > -1:
- self.tokens.append(tokenData(self.rtfData[lastDataStart:i]))
- lastDataStart = -1
-
- self.tokens.append(tokenDelimitatorStart())
- i = i + 1
- continue
-
- if isChar(self.rtfData[i], '}'):
- if lastDataStart > -1:
- self.tokens.append(tokenData(self.rtfData[lastDataStart:i]))
- lastDataStart = -1
-
- self.tokens.append(tokenDelimitatorEnd())
- i = i + 1
- continue
-
- if isChar(self.rtfData[i], '\\'):
- if i + 1 >= len(self.rtfData):
- raise Exception('Error: Control character found at the end of the document.')
- i + 1 >= len(self.rtfData)
- if lastDataStart > -1:
- self.tokens.append(tokenData(self.rtfData[lastDataStart:i]))
- lastDataStart = -1
-
- tokenStart = i
- i = i + 1
- if isAsciiLetter(self.rtfData[i]):
- consumed = False
- while i < len(self.rtfData):
- if not isAsciiLetter(self.rtfData[i]):
- tokenEnd = i
- consumed = True
- break
-
- i = i + 1
- if not consumed:
- raise Exception('Error (at:%d): Control Word without end.' % tokenStart)
- consumed
- if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
- consumed = False
- l = 0
- while i < len(self.rtfData):
- if not isDigit(self.rtfData[i]):
- consumed = True
- break
-
- l = l + 1
- i = i + 1
- if l > 10:
- raise Exception('Error (at:%d): Too many digits in control word numeric argument.' % [
- tokenStart])
- l > 10
- if not consumed:
- raise Exception('Error (at:%d): Control Word without numeric argument end.' % [
- tokenStart])
- consumed
-
- separator = ''
- if isChar(self.rtfData[i], ' '):
- separator = ' '
-
- controlWord = self.rtfData[tokenStart:tokenEnd]
- if tokenEnd < i:
- value = int(self.rtfData[tokenEnd:i])
- if isString(controlWord, '\\bin'):
- i = i + value
- self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
- else:
- self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
- else:
- self.tokens.append(tokenControlWord(controlWord, separator))
- if self.rtfData[i] == ' ':
- i = i + 1
-
- self.rtfData[i] == ' '
- self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart:i + 1]))
- i = i + 1
- continue
-
- if lastDataStart < 0:
- lastDataStart = i
-
- i = i + 1
-
-
- def toRTF(self):
- result = []
- for token in self.tokens:
- result.append(token.toRTF())
-
- return ''.join(result)
-
-
- if __name__ == '__main__':
- import sys
- if len(sys.argv) < 2:
- print 'Usage %prog rtfFileToConvert'
- sys.exit()
-
- f = open(sys.argv[1], 'rb')
- data = f.read()
- f.close()
- tokenizer = RtfTokenizer(data)
- parsedTokens = RtfTokenParser(tokenizer.tokens)
- data = parsedTokens.toRTF()
- f = open(sys.argv[1], 'w')
- f.write(data)
- f.close()
-
-