home *** CD-ROM | disk | FTP | other *** search
- unit Strutils;
- { string handling procedures for Pascal-style strings }
-
- interface
- uses Classes; { this unit defines TStringList }
-
- const
- TAB = #09;
- CR = #13;
- LF = #10;
- CRLF = CR + LF;
- SEPARATORS = [' ', '.', ',', '!', '?',';',':', '''', '"', '(', ')',
- '[', ']', '{', '}', TAB ];
- LOWCHARS = ['a'..'z'];
- UPCHARS = ['A'..'Z'];
-
- type
- charSet = set of char;
-
-
- tokenindex = record
- tstart, tend : integer;
- end;
-
- tokenindexes = array[1..255] of tokenindex;
-
-
-
- function LowCase( c : char ) : char;
- { return lowercase version of char c }
- function ChangeCase( const s : string ) : string;
- { return toggled-case version of string s }
- function removeLeadChars( const s : string; chars : charSet ) : string;
- { trim chars from Left of String s }
- function removeTrailingChars( const s : string; chars : charSet ) : string;
- { trim chars from Right of String s }
- function trimLeftStr( const s : string ) : string;
- { remove leading SEPARATORS from string 's' }
- function trimRightStr( const s : string ) : string;
- { remove trailing SEPARATORS from string 's' }
- function trimEndsStr( const s : string ) : string;
- { trim SEPARATORS from both left and right of string }
- procedure firstrestStr( const s : string; var first, rest : string );
- { parse s into 1st word first and remainder rest }
- function encrypt( const s : string; cryptint : integer ) : string;
- { encrypt a string by doing maths using cryptint }
- function decrypt( const s : string; cryptint : integer ) : string;
- { decrypt a string by doing maths using cryptint }
- procedure parse( const s : string; slist : TStringList );
- { parse string, s, into individual words in slist }
-
-
- {===NEW ROUTINES===}
- function TokenStart( const s : string; chars : charSet ) : integer;
- procedure firstTokenAt( s : string;
- var tokstart, tokend : integer;
- var first, rest : string );
- procedure tokensFoundAt( s : string;
- var num: integer;
- var tindexes: tokenindexes );
- (* // the following procedure is not public
- procedure firstTokenAtSIndex( s : string;
- var startindex, prevend : integer;
- var first, rest : string ); *)
- {===============}
-
- implementation
-
- function LowCase( c : char ) : char;
- begin
- if (c in UPCHARS) then
- Result := Chr(Ord(c) + 32)
- else Result := c;
- end;
-
- function ChangeCase( const s : string ) : string;
- { changes all lowercase chars to uppercase and vice versa }
- var
- i : integer;
- s2 : string;
- begin
- s2 := '';
- for i := 1 to Length( s ) do
- if (s[i] in LOWCHARS) then
- s2 := s2 + UpCase( s[i] )
- else s2 := s2 + LowCase( s[i] );
- Result := s2;
- end;
-
- function TokenStart( const s : string; chars : charSet ) : integer;
- { Find index (1 based) of first token in s. Return 0 if this is an empty string
- chars is a set of chars that separate tokens (e.g. whitespace, punctuation)
- TokenStart('.Hello', SEPARATORS );
- returns:
- 2 // the index of the letter 'H'
- NOTE:
- returns 0 if no token is found
- }
- var
- i : integer;
- begin
- i := 1;
- if not (Length(s) = 0) then
- while (s[i] in chars) do
- inc(i);
- if i > Length(s) then { return 0 if s nothing but characters in chars set }
- Result := 0
- else
- Result := i;
- end;
-
- procedure firstTokenAtSIndex( s : string;
- var startindex, prevend : integer;
- var first, rest : string );
- {
- This is really just a wrapper around the firstTokenAt() function.
- The differencs is that it keeps track of the index of each
- token relative to the original input string.
- This procedure is PRIVATE!
- }
- var
- tokstart,tokend : integer;
- begin
- tokstart := 0; tokend := 0;
- firstTokenAt( s, tokstart, tokend, first, rest);
- startindex := (prevend+tokstart)-1; // 0-index
- prevend := (prevend+tokend)-1;
- end;
-
- procedure tokensFoundAt( s : string;
- var num: integer;
- var tindexes: tokenindexes ); //!! NEW
- var
- first : string;
- te, ts : integer;
- begin
- first := '';
- ts := 0;
- te := 1;
- num := 0;
- while (s <> '') do // loop while there is more to process
- begin
- firstTokenAtSIndex( s, ts, te, first, s) ;
- if (te > ts) then // only if a token has been parsed (end index, te
- begin // is greater than start index, ts), inc num
- num := num + 1; // and fill a record with a pair of indexes
- tindexes[num].tstart := ts-1; // 0-indexed
- tindexes[num].tend := te-ts;
- end;
- end;
- end;
-
- procedure firstTokenAt( s : string;
- var tokstart, tokend : integer;
- var first, rest : string );
- { Return first token in string s and its start and end indices.
- Also return rest of string.
-
- arguments
- INPUT OUTPUT
- s a string
- tokstart index of 1st char of 1st token in s
- tokend index of end of 1st token + 1 in s
- first first token in s
- rest remainder of s after 1st token
-
- NOTE: indexed from 1, not 0
- when no more tokens in s, sets first and rest to '', sets
- tokstart and tokend to 0.
-
- examples:
- //i
-
- s:= '!Hello, world!'
- firstTokenAt( s,tokstart,tokend,first,rest);
-
- returns:
- tokstart=2, tokend = 7, first='Hello' rest=', world!'
-
- //ii
- rest := !Hello, world!;
- first := '';
- tokstart := 1;
- tokend := 0;
- while (tokstart <> 0) do
- firstTokenAt( rest, tokstart, tokend, first, rest);
-
- returns (in succession):
- tokstart=2, tokend = 7, first=Hello, rest=, world!
- tokstart=3, tokend = 8, first=world, rest=!
- }
- begin
- first := '';
- rest := '';
- tokstart := TokenStart( s, SEPARATORS );
- if tokstart = 0 then
- tokend := 0
- else
- begin
- tokend := tokstart;
- while not (s[tokend] in SEPARATORS) and not (tokend > Length( s ) ) do
- inc(tokend);
- first := copy( s, tokstart, tokend-tokstart );
- rest := copy(s, tokend, MAXINT );
- end;
- end;
-
-
- function removeLeadChars( const s : string; chars : charSet ) : string;
- { trims string 's' by removing chars in charSet 'chars'
- e.g. charSet might be [' ', '.' ]. So if s = ' .hello'
- this function would return: 'hello'.
-
- N.B. The Constant MAXINT is used to define the string
- length to the Copy function since, as Delphi Help states:
- "If Count specifies more characters than are available,
- the only the characters from S[Index] to the end of S are returned."
- MAXINT contains the largest possible Integer}
- var
- i : integer;
- begin
- i := 1;
- if not (Length(s) = 0) then
- while (s[i] in chars) do
- inc(i);
- Result := Copy( s, i, MAXINT );
- end;
-
- function removeTrailingChars( const s : string; chars : charSet ) : string;
- var
- i : integer;
- begin
- i := length(s);
- { count backward from end of characters found in charSet }
- if not (Length(s) = 0) then
- while (s[i] in chars) and (i <> 0 ) do
- dec(i);
- { then return a copy of the string minus the unwanted trailing chars }
- Result := copy(s, 1, i);
- end;
-
-
- function trimLeftStr( const s : string ) : string;
- { remove leading separators from string 's' }
- begin
- trimLeftStr := removeLeadChars( s, SEPARATORS );
- end;
-
- function trimRightStr( const s : string ) : string;
- { remove trailing separators from string 's' }
- begin
- trimRightStr := removeTrailingChars( s, SEPARATORS );
- end;
-
- function trimEndsStr( const s : string ) : string;
- { trim separators from both left and right of string }
- begin
- trimEndsStr := trimLeftStr(trimRightStr(s));
- end;
-
- procedure firstrestStr( const s : string; var first, rest : string );
- { Given a string, 's', parse out the first word as 'first' and
- leave the remainder of the string untouched as 'rest' }
- var
- i : integer;
- s2 : string;
- begin
- i := 1;
- first := '';
- rest := '';
- s2 := trimLeftStr( s );
- if not (Length(s2) = 0) then
- while not (s2[i] in SEPARATORS) and not (i > Length( s2 ) ) do
- inc(i);
- first := copy( s2, 1, i-1 );
- rest := copy(s2, i, MAXINT );
- end;
-
- function encrypt( const s : string; cryptint : integer ) : string;
- var
- i : integer;
- s2 : string;
- begin
- s2 := '';
- if not ( Length(s) = 0 ) then
- for i := 1 to Length( s ) do
- s2 := s2 + Chr(Ord( s[i] ) + cryptint );
- Result := s2;
- end;
-
- function decrypt( const s : string; cryptint : integer ) : string;
- var
- i : integer;
- s2 : string;
- begin
- s2 := '';
- if not ( Length(s) = 0 ) then
- for i := 1 to Length( s ) do
- s2 := s2 + Chr(Ord( s[i] ) - cryptint );
- Result := s2;
- end;
-
- procedure parse( const s : string; slist : TStringList );
- { parse s into a series of tokens (e.g. individual words) and
- return these in slist.
-
- IMPORTANT: It is the responsibility of the calling
- code to create a valid TStringList object and to free
- it upon completion! }
- var
- s2, f, r : string;
- begin
- s2 := TrimEndsStr(s);
- { loop through string, until there are no more words to be parsed }
- while s2 <> '' do
- begin
- firstrestStr(s2, f, r ); { parse 1st word, f, from string }
- slist.Add(f);
- s2 := r;
- end;
- end;
-
-
- end.
-