OS/2 Shareware BBS: 10 Tools

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / sa104os2.zip / SATHR104.ZIP / SATHER / CONTRIB / HUU / STRMATCH.SA < prev next >

Wrap

Text File | 1994-10-25 | 7KB | 224 lines

-- Copyright (C) International Computer Science Institute, 1994. COPYRIGHT -- -- NOTICE: This code is provided "AS IS" WITHOUT ANY WARRANTY and is subject -- -- to the terms of the SATHER LIBRARY GENERAL PUBLIC LICENSE contained in -- -- the file "Doc/License" of the Sather distribution. The license is also -- -- available from ICSI, 1947 Center St., Suite 600, Berkeley CA 94704, USA. -- --------> Please email comments to "sather-bugs@icsi.berkeley.edu". <---------- -- File: strmatch.sa -- Author: Le van Huu (huu@ICSI.Berkeley.EDU, levan@imiucca.csi.unimi.it)) --*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --* FUNCTION: Main class for two well known string matching algorithms. --* The algorithms come from --* Thomas H. Cormen, Charles E., Introduction to Algorithms, MIT Press,1990, --* pp. 853-885. --* --* PROBLEM DEFINITION: --* Assume the text string is an array T[0..n-1] and the --* pattern string an array P[0..m-1]. We say that P occurs --* with valid shifts s (or occurs beginning at positions s) in T if: --* T[s+j] = P[j] for 0 =< j =< m -1 --* The algorithms implemented are the naive string matcher algorithm --* and the Knuth-Morris-Pratt algorithm. --* --* NAIVE ALGORITHM: --* The algorithm uses P as a template to slide next to the text T --* (from left to right, character by character). For each shift, --* it compares all P[i] of T[s+i]. If no mismatched character --* occurs, we obtain a valid shift position, otherwise we have --* to slide P next. --* The algorithm takes time O((n-m+1)m). --* --* KNUTH-MORRIS-PRATT: --* When a mismatched character occurs at position q of P, --* instead of moving the template P to right only of a character, --* the algorithm shifts to right of k characters, where k is the --* longhest prefix of P that is also a suffix of Pq. --* The suffixes of all possible prefixes of the P string is --* precomputed by an appropriate routine. It takes time O(m). --* The algorithm runs in time O(m+n). --* --* CLASSES: STRMATCH, PREFIX --*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class STRMATCH is -- Main class for all string matching algorithms. -- Each routine corresponds to an algorithm and can be called by -- any application. It contains two parameters corresponding to the -- text string (T) and the pattern string (P) respectively. -- The strings must be terminated by the '\0' character. -- Each routine also returns -- an integer array representing the valid shift values. const init_size:INT:=10; -- initial size of shift array const ENDARR:INT:=-2; -- special value indicating the array end shared arr_obj : TASK_ARRAY {INT}; -- object for checking array sizes attr n:INT; -- text length attr m:INT; -- pattern length attr s:INT; -- shift position attr arr_shift:ARRAY{INT}; -- array of shift positions attr index_s:INT; -- index of arr_shift attr lim:INT; initialize(T: STR, P: STR) is arr_obj := #TASK_ARRAY {INT}; arr_shift:=#ARRAY{INT}(init_size); n:= T.length; -- to discard the character '\0' m:= P.length ; lim:= n-m+1; s:=0; index_s:= 0; end; create:STRMATCH is res::=new; return res; end; naive (T: STR, P: STR): ARRAY{INT} is -- returns array of all shift numbers itext:INT; -- text index ipattern:INT; -- pattern index found:BOOL; initialize(T, P); loop until! (s=lim); found:=true; itext:= s; ipattern:=0; loop until! (ipattern >= m); if P[ipattern] /= T[itext] then found := false; ipattern := m; itext := itext + 1; else itext := itext + 1; end; -- if ipattern := ipattern +1; end; -- inner loop if found then -- pattern matched a substring of text at position s. -- Before storing s into arr_shift, we must check if there -- are enougth rooms arr_shift := arr_obj.check_size (arr_shift, index_s); arr_shift[index_s] := s; index_s:= index_s + 1; end; s:= s+1; end; -- outer loop arr_shift[index_s]:= ENDARR; return arr_shift; end; kmp (T: STR, P: STR): ARRAY{INT} is -- Knuth-Morris-Pratt algorithm q, i : INT; initialize(T, P); prefix_id:PREFIX:= #PREFIX; prefix_id.compute_prefix (P, m); q := -1; i := 0; lim := m-1; loop until! ( i>= n); loop if q >= 0 and P[q+1] /= T[i] then q := prefix_id.arr_prefix [q]; else break!; end; end; -- inner loop if P[q+1] = T[i] then q:= q+1; end; if q = lim then -- pattern matched a substring of text at position i-m+1. q:=prefix_id.arr_prefix[q]; arr_shift := arr_obj.check_size (arr_shift, index_s); arr_shift[index_s] := i-m+1; index_s:= index_s + 1; end; i:= i+1; end; -- outer loop arr_shift[index_s]:= ENDARR; -- special value to indicate the array end. return arr_shift; end; -- kmp routine end; -- class STRMATCH --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`` class PREFIX is -- Useful for the KMP algorithm. -- Prepares the arr_prefix containing the suffixes -- of all possible prefixes of the P string const prefix_size:INT:=10; attr arr_prefix:ARRAY{INT}; -- array of prefix positions create:PREFIX is res ::= new; return res; end; compute_prefix(P:STR, m:INT) is -- Prepares the arr_prefix containing the suffixes -- of all possible prefixes of the P string k, q :INT; arr_obj:TASK_ARRAY {INT} := #TASK_ARRAY {INT}; arr_prefix:=#ARRAY{INT}(prefix_size); arr_prefix[0] := -1; k := -1; q := 1; loop until! (q >= m); loop if k >= 0 and P[k+1] /= P[q] then k := arr_prefix [k]; else break!; end; end; -- inner loop if P[k+1] = P[q] then k:= k+1; end; arr_prefix := arr_obj.check_size (arr_prefix, q); arr_prefix[q] := k; q:= q+1; end; -- outer loop end; -- compute_prefix print (arr_prefix: ARRAY {INT}, m :INT) is i:INT :=0; #OUT + "prefix=" + "\n"; loop until! (i=m); #OUT + arr_prefix[i] + " "; i := i +1; end; #OUT + "\n"; end; -- print end; -- class PREFIX --~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`` class TASK_ARRAY {T} is -- class shared by objects that need to check if there are sufficient rooms -- in their arrays for storing data. -- Enventually, the built-in toutine "resize" will extend -- the array dimensione doubling its preceding size. create:TASK_ARRAY {T} is res ::= new; return res; end; check_size (arr: ARRAY{T}, index:INT) : ARRAY{T} is if index >= (arr.asize - 1) then arr:= arr.resize(2*arr.asize); end; return arr; end; end; -- class TASK_ARRAY