PC Plus SuperCD (UK) 2000 March

home *** CD-ROM | disk | FTP | other *** search

/ PC Plus SuperCD (UK) 2000 March / pcp161b.iso / handson / archive / Issue158 / delphi / RichEditTest / Strutils.pas < prev next >

Wrap

Pascal/Delphi Source File | 1999-08-13 | 9.5 KB | 325 lines

unit Strutils; { string handling procedures for Pascal-style strings } interface uses Classes; { this unit defines TStringList } const TAB = #09; CR = #13; LF = #10; CRLF = CR + LF; SEPARATORS = [' ', '.', ',', '!', '?',';',':', '''', '"', '(', ')', '[', ']', '{', '}', TAB ]; LOWCHARS = ['a'..'z']; UPCHARS = ['A'..'Z']; type charSet = set of char; tokenindex = record tstart, tend : integer; end; tokenindexes = array[1..255] of tokenindex; function LowCase( c : char ) : char; { return lowercase version of char c } function ChangeCase( const s : string ) : string; { return toggled-case version of string s } function removeLeadChars( const s : string; chars : charSet ) : string; { trim chars from Left of String s } function removeTrailingChars( const s : string; chars : charSet ) : string; { trim chars from Right of String s } function trimLeftStr( const s : string ) : string; { remove leading SEPARATORS from string 's' } function trimRightStr( const s : string ) : string; { remove trailing SEPARATORS from string 's' } function trimEndsStr( const s : string ) : string; { trim SEPARATORS from both left and right of string } procedure firstrestStr( const s : string; var first, rest : string ); { parse s into 1st word first and remainder rest } function encrypt( const s : string; cryptint : integer ) : string; { encrypt a string by doing maths using cryptint } function decrypt( const s : string; cryptint : integer ) : string; { decrypt a string by doing maths using cryptint } procedure parse( const s : string; slist : TStringList ); { parse string, s, into individual words in slist } {===NEW ROUTINES===} function TokenStart( const s : string; chars : charSet ) : integer; procedure firstTokenAt( s : string; var tokstart, tokend : integer; var first, rest : string ); procedure tokensFoundAt( s : string; var num: integer; var tindexes: tokenindexes ); (* // the following procedure is not public procedure firstTokenAtSIndex( s : string; var startindex, prevend : integer; var first, rest : string ); *) {===============} implementation function LowCase( c : char ) : char; begin if (c in UPCHARS) then Result := Chr(Ord(c) + 32) else Result := c; end; function ChangeCase( const s : string ) : string; { changes all lowercase chars to uppercase and vice versa } var i : integer; s2 : string; begin s2 := ''; for i := 1 to Length( s ) do if (s[i] in LOWCHARS) then s2 := s2 + UpCase( s[i] ) else s2 := s2 + LowCase( s[i] ); Result := s2; end; function TokenStart( const s : string; chars : charSet ) : integer; { Find index (1 based) of first token in s. Return 0 if this is an empty string chars is a set of chars that separate tokens (e.g. whitespace, punctuation) TokenStart('.Hello', SEPARATORS ); returns: 2 // the index of the letter 'H' NOTE: returns 0 if no token is found } var i : integer; begin i := 1; if not (Length(s) = 0) then while (s[i] in chars) do inc(i); if i > Length(s) then { return 0 if s nothing but characters in chars set } Result := 0 else Result := i; end; procedure firstTokenAtSIndex( s : string; var startindex, prevend : integer; var first, rest : string ); { This is really just a wrapper around the firstTokenAt() function. The differencs is that it keeps track of the index of each token relative to the original input string. This procedure is PRIVATE! } var tokstart,tokend : integer; begin tokstart := 0; tokend := 0; firstTokenAt( s, tokstart, tokend, first, rest); startindex := (prevend+tokstart)-1; // 0-index prevend := (prevend+tokend)-1; end; procedure tokensFoundAt( s : string; var num: integer; var tindexes: tokenindexes ); //!! NEW var first : string; te, ts : integer; begin first := ''; ts := 0; te := 1; num := 0; while (s <> '') do // loop while there is more to process begin firstTokenAtSIndex( s, ts, te, first, s) ; if (te > ts) then // only if a token has been parsed (end index, te begin // is greater than start index, ts), inc num num := num + 1; // and fill a record with a pair of indexes tindexes[num].tstart := ts-1; // 0-indexed tindexes[num].tend := te-ts; end; end; end; procedure firstTokenAt( s : string; var tokstart, tokend : integer; var first, rest : string ); { Return first token in string s and its start and end indices. Also return rest of string. arguments INPUT OUTPUT s a string tokstart index of 1st char of 1st token in s tokend index of end of 1st token + 1 in s first first token in s rest remainder of s after 1st token NOTE: indexed from 1, not 0 when no more tokens in s, sets first and rest to '', sets tokstart and tokend to 0. examples: //i s:= '!Hello, world!' firstTokenAt( s,tokstart,tokend,first,rest); returns: tokstart=2, tokend = 7, first='Hello' rest=', world!' //ii rest := !Hello, world!; first := ''; tokstart := 1; tokend := 0; while (tokstart <> 0) do firstTokenAt( rest, tokstart, tokend, first, rest); returns (in succession): tokstart=2, tokend = 7, first=Hello, rest=, world! tokstart=3, tokend = 8, first=world, rest=! } begin first := ''; rest := ''; tokstart := TokenStart( s, SEPARATORS ); if tokstart = 0 then tokend := 0 else begin tokend := tokstart; while not (s[tokend] in SEPARATORS) and not (tokend > Length( s ) ) do inc(tokend); first := copy( s, tokstart, tokend-tokstart ); rest := copy(s, tokend, MAXINT ); end; end; function removeLeadChars( const s : string; chars : charSet ) : string; { trims string 's' by removing chars in charSet 'chars' e.g. charSet might be [' ', '.' ]. So if s = ' .hello' this function would return: 'hello'. N.B. The Constant MAXINT is used to define the string length to the Copy function since, as Delphi Help states: "If Count specifies more characters than are available, the only the characters from S[Index] to the end of S are returned." MAXINT contains the largest possible Integer} var i : integer; begin i := 1; if not (Length(s) = 0) then while (s[i] in chars) do inc(i); Result := Copy( s, i, MAXINT ); end; function removeTrailingChars( const s : string; chars : charSet ) : string; var i : integer; begin i := length(s); { count backward from end of characters found in charSet } if not (Length(s) = 0) then while (s[i] in chars) and (i <> 0 ) do dec(i); { then return a copy of the string minus the unwanted trailing chars } Result := copy(s, 1, i); end; function trimLeftStr( const s : string ) : string; { remove leading separators from string 's' } begin trimLeftStr := removeLeadChars( s, SEPARATORS ); end; function trimRightStr( const s : string ) : string; { remove trailing separators from string 's' } begin trimRightStr := removeTrailingChars( s, SEPARATORS ); end; function trimEndsStr( const s : string ) : string; { trim separators from both left and right of string } begin trimEndsStr := trimLeftStr(trimRightStr(s)); end; procedure firstrestStr( const s : string; var first, rest : string ); { Given a string, 's', parse out the first word as 'first' and leave the remainder of the string untouched as 'rest' } var i : integer; s2 : string; begin i := 1; first := ''; rest := ''; s2 := trimLeftStr( s ); if not (Length(s2) = 0) then while not (s2[i] in SEPARATORS) and not (i > Length( s2 ) ) do inc(i); first := copy( s2, 1, i-1 ); rest := copy(s2, i, MAXINT ); end; function encrypt( const s : string; cryptint : integer ) : string; var i : integer; s2 : string; begin s2 := ''; if not ( Length(s) = 0 ) then for i := 1 to Length( s ) do s2 := s2 + Chr(Ord( s[i] ) + cryptint ); Result := s2; end; function decrypt( const s : string; cryptint : integer ) : string; var i : integer; s2 : string; begin s2 := ''; if not ( Length(s) = 0 ) then for i := 1 to Length( s ) do s2 := s2 + Chr(Ord( s[i] ) - cryptint ); Result := s2; end; procedure parse( const s : string; slist : TStringList ); { parse s into a series of tokens (e.g. individual words) and return these in slist. IMPORTANT: It is the responsibility of the calling code to create a valid TStringList object and to free it upon completion! } var s2, f, r : string; begin s2 := TrimEndsStr(s); { loop through string, until there are no more words to be parsed } while s2 <> '' do begin firstrestStr(s2, f, r ); { parse 1st word, f, from string } slist.Add(f); s2 := r; end; end; end.