home *** CD-ROM | disk | FTP | other *** search
- (*
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓·── ──·▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓│ │░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ Unit was conceived, designed and written ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ by Floor A.C. Naaijkens for ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ UltiHouse Software / The ECO Group. ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ (C) MCMXCII by EUROCON PANATIONAL CORPORATION. ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ All Rights Reserved for The ECO Group. ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓ ░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓│ │░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓·── ──·░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
- *)
- {$A-,B-,D+,E-,F-,I-,L-,N-,O+,R-,S-,V-}
- {$M 65520,0,655350}
- unit eco_srch;
- interface
-
- const
- maxbuffer = 32768;
-
- var
- maxpos : word;
- buffer : array[1..maxbuffer] of byte;
- target : string;
- table1 : array[char] of byte;
- table2 : string;
- ch : char;
-
-
- procedure make_boyer_moore_table(
- var target : string;
- var table1;
- var table2;
- casesensitive : boolean
- );
-
- function boyer_moore_search(
- var text_array; start,
- text_length: word; var target: string; var table1;
- var table2; casesensitive: boolean
- ): word;
-
-
-
-
- implementation
-
-
-
- uses
- crt;
-
-
-
-
- procedure make_boyer_moore_table;
- var
- l_target : string absolute target;
- l_table1 : array[char] of byte absolute table1;
- l_table2 : string absolute table2;
- reverse : string;
- i,j,k : integer;
- ch : char;
-
- begin { make_boyer_moore_table }
-
- {-------------------------------------------------------}
- { creation of the first table, it contains values for }
- { every caracter in the ascii table. all caracters not }
- { contained in the search string are given the largest }
- { possible value. }
- {-------------------------------------------------------}
-
- {-------------------------------------------------------}
- { first set target to all uppercase if search is not to }
- { be sensitive }
- {-------------------------------------------------------}
- if not casesensitive then for i := 1 to length(l_target) do
- l_target[i] := upcase(l_target[i]);
-
- {--------------------------------------------------------}
- { initialize the table to the largest skip value ie: }
- { length(target) }
- {--------------------------------------------------------}
-
- fillchar(l_table1, sizeof(l_table1), target[0]);
-
- {--------------------------------------------------------}
- { replace values for caracters contained in target with }
- { their distance from the right. except for the last }
- { caracter!!! }
- {--------------------------------------------------------}
-
- for i := 1 to ord(target[0])-1 do begin
- l_table1[target[i]] := ord(target[0])-i;
- end;
-
- {--------------------------------------------------------}
- { in case of a case insensitive search, the distance }
- { should be the same for a caracter be-it upper-case or }
- { lower-case. the distance is allways the right-most }
- { occurance of the caracter }
- {--------------------------------------------------------}
-
- if not casesensitive then begin
- for ch := 'a' to 'z' do begin
- if l_table1[ch] < l_table1[upcase(ch)] then begin
- l_table1[upcase(ch)] := l_table1[ch];
- end else begin
- l_table1[ch] := l_table1[upcase(ch)];
- end;
- end;
- end;
-
- {--------------------------------------------------------}
- { creation of the second table, it contains values for }
- { each position in the search string. essentially, each }
- { value is the distance the distance to move left before }
- { a match can be found with the right right-sub-string }
- { can be found again. the right sub-string starts with }
- { the caracter immediately to the right of the current }
- { position in the search string. }
- { }
- { example: abcabcde }
- { }
- { if a miss occurs on the "E" and we are at position 10 }
- { in the text then we can move only one caracter to the }
- { right since all we know is that the caracter at }
- { position 10 is not an "E". }
- { }
- { if the miss occurs on the "D" then we can move 8 }
- { positions to the right since we now know that the "E" }
- { matched but there are no more "E"'s in the search }
- { string. all positions to the left of "D" would also }
- { generate a move of 8 positions since the substring }
- { "DE" does not repeat either nor does "CDE" then "BCDE" }
- { etc... since any string in this case is a superset of }
- { it's right substrings }
- { }
- { if the search string had been: }
- { }
- { kfgabcabc }
- { }
- { then a miss on the first "C" from the left would }
- { generate a move of only three since we would then know }
- { that the text matched "ABC" and what miss-matched with }
- { the first "C" might be a "G". both values are used }
- { when a miss occurs, the greatest of the two values is }
- { then used to move the search string forward }
- {--------------------------------------------------------}
-
-
- {--------------------------------------------------------}
- { since we want the right-most position of the }
- { sub-string, set a new string that is a reverse of the }
- { original. }
- {--------------------------------------------------------}
-
- l_table2 := target;
- reverse := target;
-
- {--------------------------------------------------------}
- { now reverse l_table2 into reverse }
- {--------------------------------------------------------}
- for i := 0 to length(target) -1 do
- if casesensitive then reverse[i+1] := target[ length(target) - i] else
- reverse[i+1] := upcase(target[ length(target) -i]);
-
- {--------------------------------------------------------}
- { now compute sub-string positions, as soon as we get a }
- { max-length for any sub-string, all subsequent }
- { sub-string are also max-length }
- {--------------------------------------------------------}
-
- for i := length(target)-1 downto 1 do begin
- if pos(copy(reverse, 1, length(target)-i), copy(reverse,2,300))>0 then
- l_table2[i] := chr(pos(copy(reverse, 1, length(target)-i),
- copy(reverse,2,300))
- ) else l_table2[i] := chr(length(target));
- end;
-
- {--------------------------------------------------------}
- { last position is allways a move of at most 1 position }
- {--------------------------------------------------------}
-
- l_table2[length(target)] := chr(1);
-
- end; { make_boyer_moore_table }
-
-
-
- function boyer_moore_search;
- var
- l_text_array : array[1..maxbuffer] of char absolute text_array;
- i,j, offset : word;
- match, found : boolean;
- distance,
- distance1,
- distance2 : byte;
- l_table1 : array[char] of byte absolute table1;
- l_table2 : record
- dummy : byte;
- distance : array[1..255] of byte;
- end absolute table2;
-
- begin { boyer_moore_search }
- i := start; found := false; offset := 0; distance := 0;
- j := length(target);
- while ((i+distance) < text_length-length(target)) and not found do begin
- inc(i, distance-(length(target)-j));
- j := length(target); match := true;
- while (j > 0) and match do begin
- if not casesensitive then match :=
- (target[j] = upcase(l_text_array[i+j])) else
- match := (target[j] = l_text_array[i+j]);
- if match then dec(j);
- end;
- found := match;
- if not found then begin
- distance1 := l_table1[l_text_array[i+j]];
- distance2 := l_table2.distance[j];
- end;
- if distance1 < distance2 then distance := distance2 else
- distance := distance1;
- end;
- boyer_moore_search := ord(found) * (i);
- end; { boyer_moore_search }
-
-
-
- {happy}end. { unit }
-
-
- {
-
- msg # 1700
- date: 22 dec 91 21:55:00
- from: mark ouellet
- to: rich veraa
- subj: boyer-moore search (1/4)
- ____________________________________________________________________________
-
- area:pascal
- eid:534f 1796af2c 4d453220
- here is part one, to be saved in boyermko.pas
-
- boyermko.pas (22 december 1991) (mark ouellet)
-
- this unit provides facilities for searching a text for
- a target using the boyer-moore search method.
-
- only peculiarity, the second skip structure is actually a
- string, this made sens since the second table has a 1:1
- ratio with the target length
- }
-
-