home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
sa104os2.zip
/
SATHR104.ZIP
/
SATHER
/
CONTRIB
/
HUU
/
STRMATCH.SA
< prev
next >
Wrap
Text File
|
1994-10-25
|
7KB
|
224 lines
-- Copyright (C) International Computer Science Institute, 1994. COPYRIGHT --
-- NOTICE: This code is provided "AS IS" WITHOUT ANY WARRANTY and is subject --
-- to the terms of the SATHER LIBRARY GENERAL PUBLIC LICENSE contained in --
-- the file "Doc/License" of the Sather distribution. The license is also --
-- available from ICSI, 1947 Center St., Suite 600, Berkeley CA 94704, USA. --
--------> Please email comments to "sather-bugs@icsi.berkeley.edu". <----------
-- File: strmatch.sa
-- Author: Le van Huu (huu@ICSI.Berkeley.EDU, levan@imiucca.csi.unimi.it))
--*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--* FUNCTION: Main class for two well known string matching algorithms.
--* The algorithms come from
--* Thomas H. Cormen, Charles E., Introduction to Algorithms, MIT Press,1990,
--* pp. 853-885.
--*
--* PROBLEM DEFINITION:
--* Assume the text string is an array T[0..n-1] and the
--* pattern string an array P[0..m-1]. We say that P occurs
--* with valid shifts s (or occurs beginning at positions s) in T if:
--* T[s+j] = P[j] for 0 =< j =< m -1
--* The algorithms implemented are the naive string matcher algorithm
--* and the Knuth-Morris-Pratt algorithm.
--*
--* NAIVE ALGORITHM:
--* The algorithm uses P as a template to slide next to the text T
--* (from left to right, character by character). For each shift,
--* it compares all P[i] of T[s+i]. If no mismatched character
--* occurs, we obtain a valid shift position, otherwise we have
--* to slide P next.
--* The algorithm takes time O((n-m+1)m).
--*
--* KNUTH-MORRIS-PRATT:
--* When a mismatched character occurs at position q of P,
--* instead of moving the template P to right only of a character,
--* the algorithm shifts to right of k characters, where k is the
--* longhest prefix of P that is also a suffix of Pq.
--* The suffixes of all possible prefixes of the P string is
--* precomputed by an appropriate routine. It takes time O(m).
--* The algorithm runs in time O(m+n).
--*
--* CLASSES: STRMATCH, PREFIX
--*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
class STRMATCH is
-- Main class for all string matching algorithms.
-- Each routine corresponds to an algorithm and can be called by
-- any application. It contains two parameters corresponding to the
-- text string (T) and the pattern string (P) respectively.
-- The strings must be terminated by the '\0' character.
-- Each routine also returns
-- an integer array representing the valid shift values.
const init_size:INT:=10; -- initial size of shift array
const ENDARR:INT:=-2; -- special value indicating the array end
shared arr_obj : TASK_ARRAY {INT}; -- object for checking array sizes
attr n:INT; -- text length
attr m:INT; -- pattern length
attr s:INT; -- shift position
attr arr_shift:ARRAY{INT}; -- array of shift positions
attr index_s:INT; -- index of arr_shift
attr lim:INT;
initialize(T: STR, P: STR) is
arr_obj := #TASK_ARRAY {INT};
arr_shift:=#ARRAY{INT}(init_size);
n:= T.length; -- to discard the character '\0'
m:= P.length ;
lim:= n-m+1;
s:=0;
index_s:= 0;
end;
create:STRMATCH is
res::=new;
return res;
end;
naive (T: STR, P: STR): ARRAY{INT} is
-- returns array of all shift numbers
itext:INT; -- text index
ipattern:INT; -- pattern index
found:BOOL;
initialize(T, P);
loop until! (s=lim);
found:=true;
itext:= s;
ipattern:=0;
loop until! (ipattern >= m);
if P[ipattern] /= T[itext] then
found := false;
ipattern := m;
itext := itext + 1;
else
itext := itext + 1;
end; -- if
ipattern := ipattern +1;
end; -- inner loop
if found then
-- pattern matched a substring of text at position s.
-- Before storing s into arr_shift, we must check if there
-- are enougth rooms
arr_shift := arr_obj.check_size (arr_shift, index_s);
arr_shift[index_s] := s;
index_s:= index_s + 1;
end;
s:= s+1;
end; -- outer loop
arr_shift[index_s]:= ENDARR;
return arr_shift;
end;
kmp (T: STR, P: STR): ARRAY{INT} is
-- Knuth-Morris-Pratt algorithm
q, i : INT;
initialize(T, P);
prefix_id:PREFIX:= #PREFIX;
prefix_id.compute_prefix (P, m);
q := -1;
i := 0;
lim := m-1;
loop until! ( i>= n);
loop
if q >= 0 and P[q+1] /= T[i] then
q := prefix_id.arr_prefix [q];
else
break!;
end;
end; -- inner loop
if P[q+1] = T[i] then
q:= q+1;
end;
if q = lim then
-- pattern matched a substring of text at position i-m+1.
q:=prefix_id.arr_prefix[q];
arr_shift := arr_obj.check_size (arr_shift, index_s);
arr_shift[index_s] := i-m+1;
index_s:= index_s + 1;
end;
i:= i+1;
end; -- outer loop
arr_shift[index_s]:= ENDARR; -- special value to indicate the array end.
return arr_shift;
end; -- kmp routine
end; -- class STRMATCH
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
class PREFIX is
-- Useful for the KMP algorithm.
-- Prepares the arr_prefix containing the suffixes
-- of all possible prefixes of the P string
const prefix_size:INT:=10;
attr arr_prefix:ARRAY{INT}; -- array of prefix positions
create:PREFIX is
res ::= new;
return res;
end;
compute_prefix(P:STR, m:INT) is
-- Prepares the arr_prefix containing the suffixes
-- of all possible prefixes of the P string
k, q :INT;
arr_obj:TASK_ARRAY {INT} := #TASK_ARRAY {INT};
arr_prefix:=#ARRAY{INT}(prefix_size);
arr_prefix[0] := -1;
k := -1;
q := 1;
loop until! (q >= m);
loop
if k >= 0 and P[k+1] /= P[q] then
k := arr_prefix [k];
else
break!;
end;
end; -- inner loop
if P[k+1] = P[q] then
k:= k+1;
end;
arr_prefix := arr_obj.check_size (arr_prefix, q);
arr_prefix[q] := k;
q:= q+1;
end; -- outer loop
end; -- compute_prefix
print (arr_prefix: ARRAY {INT}, m :INT) is
i:INT :=0;
#OUT + "prefix=" + "\n";
loop until! (i=m);
#OUT + arr_prefix[i] + " ";
i := i +1;
end;
#OUT + "\n";
end; -- print
end; -- class PREFIX
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~``
class TASK_ARRAY {T} is
-- class shared by objects that need to check if there are sufficient rooms
-- in their arrays for storing data.
-- Enventually, the built-in toutine "resize" will extend
-- the array dimensione doubling its preceding size.
create:TASK_ARRAY {T} is
res ::= new;
return res;
end;
check_size (arr: ARRAY{T}, index:INT) : ARRAY{T} is
if index >= (arr.asize - 1) then
arr:= arr.resize(2*arr.asize);
end;
return arr;
end;
end; -- class TASK_ARRAY