home *** CD-ROM | disk | FTP | other *** search
- {WookieWare Home Defense Series cautiously presents
- String matching routines, Public Domain effective immediately.
- Please bestow credit in any distributed software or source.
- (Yes, they're tested. No, I don't claim to have written them in half
- an hour.) }
-
-
- Uses crt,dos;
- const seeds:string='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
- var string1,string2:string;
- i:integer;
- percent,percent2,percent3:integer;
-
-
-
- function similar100(st1,st2:string):word;
- {This one started the whole thing. Loosely based on an algorithm called
- SIMILAR.ASM written by John W. Ratcliff and David E. Metzener
- only a lot more understandable. Returns percentage match. Pretty slow
- compared to the ASM versions. Case sensitive.
- Ron Nossaman Sept. 30 1994 }
- var score:integer;
-
- procedure compare(s1,s2:string);
- var s1l,s1r,s2l,s2r,looker:integer;
- begin
- s1l:=1;s2l:=1;
- s1r:=length(s1);
- s2r:=length(s2);
- looker:=s2l;
- {increment s1, sweep s2}
- repeat
- if s1[s1l]=s2[looker] then
- begin {got a match}
- inc(s1l); {next position on everything}
- inc(looker);
- s2l:=looker; {pull up starting position marker}
- inc(score);
- end else inc(looker); {no match, continue sweep}
- if looker>s2r then {looker swept past end of string}
- begin
- looker:=s2l; {restore looker to last unmatched position}
- if s2l>s2r then s1l:=s1r;
- inc(s1l); {next char in first string for matching}
- end;
- until s1l>s1r;
- end;
- begin
- score:=0;
- compare(st1,st2);
- compare(st2,st1);
- score:=(score*100)div(length(st1)+length(st2));
- similar100:=score;
- end;
-
-
-
-
- {$F+} {I don't know for sure, might be necessary in multi segment program}
-
- Function Match(Var s1:String; Var s2:String):word;
- {Uncle Ron's algorithm to compare two strings, returns percentage match}
- {Case sensitive}
- {Ron Nossaman Oct2, 1994}
- begin
- asm
- LES DI,[S2]
- LDS SI,[S1]
- Xor dx,dx {zero score}
- xor ax,ax
- cmp [si],al {is byte1 a zero?}
- je @strerr {yes, BAIL}
- cmp [di],al
- jne @docmp
- @strerr:
- jmp @millertime {BAIL}
- { ;neither strings zero length, do it}
- @docmp:
- cld
- Xor ax,ax
- mov al,[di] {get length S2}
- mov cx,ax {save in cx}
- add ax,di
- mov bx,ax {bx=pointer last byte S2}
- inc di {di=pointer first byte S2}
- Xor ax,ax
- mov al,[si]
- push ax
- add ax,cx
- mov cx,ax {total length both strings}
- pop ax
- add ax,si {ax=pointer last byte S1}
- inc si {si=pointer first byte S1}
- {ax=lastchar s1}
- {bx=lastchar s2}
- {si=firstchar s1}
- {di=firstchar s2}
-
- push cx {save 'total' characters}
- push bx {save s2 end}
- push ax {save s1 end}
-
- mov cx,0 {indicator of first pass through compare}
- jmp @compare
- @round2:
- LES DI,[S1] {swap string beginnings}
- LDS SI,[S2]
- inc si
- inc di
- pop bx {s2 end swapped}
- pop ax {s1 end swapped}
- {'total' still on stack}
- mov cx,1 {pass 2 indicator}
-
- @compare:
- push cx {save pass indicator}
- mov cx,di {let keeper remember starting point}
- @workloop:
- push ax {save eos pointers to free up registers}
- push bx
- xor ax,ax
- mov al,[si]
- mov bx,ax
- mov al,[di]
- cmp ax,bx {are chars equal?}
- jne @nomatch {no, pass on}
- inc si {yes, increment both string position pointers}
- inc di
- mov cx,di {keeper remembers new starting position}
- inc dx {score}
- jmp @progress
- @nomatch:
- inc di {no match, try next char in second string}
- @progress:
- pop bx {restore end of string pointers}
- pop ax
- cmp di,bx {is string 2 used up without match?}
- jle @nofix {nope, go on}
- mov di,cx {restore last unmatched position}
- cmp di,bx {is string2 matched to the end?}
- jle @nofix2 {no, go try next letter of string1}
- mov si,ax {yes, nothing left to compare, cancel further search}
- @nofix2:
- inc si {next char string1}
- @nofix:
- cmp si,ax {done yet?}
- jle @workloop {nope, hiho}
- pop cx {retreive pass indicator}
- cmp cx,0 {0=pass1}
- je @round2 {go back for pass 2}
- mov ax,dx {score}
- mov cx,100
- mul cx
- pop cx {get 'total' characters}
- div cx
- @millertime:
- mov @result,ax
- end;
- end;
-
-
-
- Function Match2(Var s1:String; Var s2:String):word;
- {Uncle Ron's algorithm to compare two strings, returns percentage match}
- {a tad smaller, faster. Still Case sensitive}
- {Ron Nossaman Oct 4, 1994}
- begin
- asm
- les di,[s2]
- lds si,[s1]
- xor ax,ax
- mov al,[si]
- cmp al,0
- je @nolength
- mov cx,ax {cx= length of string1}
- mov al,[di]
- cmp al,0
- jne @docmp {ax= length of string2}
- @nolength:
- jmp @millertime {BAIL}
-
- @docmp: { ;neither strings zero length, do it}
- cld
- mov dx,ax {save length(s2)}
- add ax,di
- mov bx,ax {bx= pointer last char s2}
- inc di {di= pointer first char s2}
- mov ax,dx {retreive length(s2)}
- add ax,cx {+length(s1)}
- push ax {save total length both strings until final scoring}
- mov ax,cx {length(s1)}
- add ax,si {ax=pointer last char s1}
- inc si {si=pointer first char s1}
- Xor dx,dx {zero score}
-
-
- {cast:} {ax=lastchar s1}
- {bx=lastchar s2}
- {si=firstchar s1}
- {di=firstchar s2}
- {dx=accumulated score}
- {cx=temporary position marker during compare}
-
-
- mov cx,0 {indicator of first pass through compare}
- jmp @compare
- @round2:
- les di,[s1] {swap string beginnings}
- lds si,[s2]
- inc si
- inc di
- xchg ax,bx {swap s1 and s2 end pointers}
- {'total' still on stack}
- mov cx,1 {pass 2 indicator}
-
- @compare:
- push cx {save pass indicator}
- mov cx,di {let keeper remember starting point}
- @workloop:
- push ax {save eos pointer to free up ax register}
- mov al,[si]
- mov ah,al
- mov al,[di]
- cmp al,ah {are chars equal?}
- jne @nomatch {no, pass on}
- inc si {yes, increment both string position pointers}
- inc di
- mov cx,di {keeper remembers new starting position}
- inc dx {score}
- jmp @progress
- @nomatch:
- inc di {no match, try next char in second string}
- @progress:
- pop ax {restore end of string pointer}
- cmp di,bx {is string 2 used up without match?}
- jle @nofix {nope, go on}
- mov di,cx {restore last unmatched position}
- cmp di,bx {is string2 matched to the end?}
- jle @nofix2 {no, go try next letter of string1}
- mov si,ax {yes, nothing left to compare, cancel further search}
- @nofix2:
- inc si {next char string1}
- @nofix:
- cmp si,ax {done yet?}
- jle @workloop {nope, hiho}
- pop cx {retreive pass indicator}
- cmp cx,0 {0=pass1}
- je @round2 {go back for pass 2}
- mov ax,dx {score}
- mov cx,100
- mul cx
- pop cx {get 'total' characters}
- div cx
- @millertime:
- mov @result,ax
- end;
- end;
-
-
-
-
-
-
- Function Match3(Var s1:String; Var s2:String; case_sensitive:boolean):word;
- {Uncle Ron's algorithm to compare two strings, returns percentage match}
- {Case sensitive/not switch Most versatile, speed comparison varies}
- {Ron Nossaman Oct 29, 1994}
- begin
- asm
- push ds
- les di,[s2]
- lds si,[s1]
- xor ax,ax
- SEGDS mov al,[si]
- cmp al,0
- je @nolength
- mov cx,ax {cx= length of string1}
- SEGES mov al,[di]
- cmp al,0
- jne @docmp {ax= length of string2}
- @nolength:
- jmp @millertime {BAIL}
-
- @docmp: { ;neither strings zero length, do it}
- cld
- mov dx,ax {save length(s2)}
- add ax,di
- mov bx,ax {bx= pointer last char s2}
- inc di {di= pointer first char s2}
- mov ax,dx {retreive length(s2)}
- add ax,cx {+length(s1)}
- push ax {save total length both strings until final scoring}
- mov ax,cx {length(s1)}
- add ax,si {ax=pointer last char s1}
- inc si {si=pointer first char s1}
- Xor dx,dx {zero score}
-
-
- {cast:} {ax=lastchar s1}
- {bx=lastchar s2}
- {si=firstchar s1}
- {di=firstchar s2}
- {dx=accumulated score}
- {cx=temporary position marker during compare}
-
-
- mov cx,0 {indicator flag of first pass through compare}
- {cheap dodge, since you can't call & ret in T.P. asm}
- jmp @compare
- @round2:
- les di,[s1] {swap string beginnings}
- lds si,[s2]
- inc si
- inc di
- xchg ax,bx {swap s1 and s2 end pointers}
- {'total' still on stack}
- mov cx,1 {pass 2 indicator}
-
- @compare:
- push cx {save pass indicator}
- mov cx,di {let keeper remember starting point}
- @workloop:
- push ax {save eos pointer to free up ax register}
- SEGDS mov al,[si]
- cmp case_sensitive,0
- jnz @CaseOK1
- cmp al,'Z'
- jg @CaseOK1
- cmp al,'A'
- jl @CaseOK1
- or al,$20
- @CaseOK1:
- mov ah,al
- SEGES mov al,[di]
- cmp case_sensitive,0
- jnz @CaseOK2
- cmp al,'Z'
- jg @CaseOK2
- cmp al,'A'
- jl @CaseOK2
- or al,$20
- @CaseOK2:
- cmp al,ah {are chars equal?}
- jne @nomatch {no, pass on}
- inc si {yes, increment both string position pointers}
- inc di
- mov cx,di {keeper remembers new starting position}
- inc dx {score}
- jmp @progress
- @nomatch:
- inc di {no match, try next char in second string}
- @progress:
- pop ax {restore end of string pointer}
- cmp di,bx {is string 2 used up without match?}
- jle @nofix {nope, go on}
- mov di,cx {restore last unmatched position}
- cmp di,bx {is string 2 matched to the end?}
- jle @nofix2 {no, go try next letter of string1}
- mov si,ax {yes, nothing left to compare, cancel further search}
- @nofix2:
- inc si {next char string1}
- @nofix:
- cmp si,ax {done yet?}
- jle @workloop {nope, hiho}
- pop cx {retreive pass indicator}
- cmp cx,0 {0=pass1}
- je @round2 {go back for pass 2}
- mov ax,dx {score}
- mov cx,100
- mul cx
- pop cx {get 'total' characters}
- div cx
- @millertime:
- mov @result,ax
- pop ds
- end;
- end;
-
-
-
-
- function bickell2(s1,s2:string):integer; {not quite, but similar}
- const
- weight:array[ord('a')..ord('{')]of byte=(
- 3,6,5,4,3,5,5,4,3,8,7,4,5,3,3,5,7,4,3,3,4,6,5,8,8,9,0);
- (* a b c d e f g h i j k l m n o p q r s t u v w x y z { *)
- var sort1,sort2:string;
- i,bick1,bick2:integer;
- b1,b2:array[ord('a')..ord('{')]of byte;
-
- begin
- sort1:=s1; sort2:=s2;
- for i:=1 to length(sort1) do if (sort1[i]<'a')or(sort1[i]>'z') then
- begin
- case sort1[i] of
- 'A'..'Z':sort1[i]:=char(ord(sort1[i])or 32);
- else sort1[i]:='{';
- end;
- end;
- for i:=1 to length(sort2) do if (sort2[i]<'a')or(sort2[i]>'z') then
- begin
- case sort2[i] of
- 'A'..'Z':sort2[i]:=char(ord(sort2[i])or 32);
- else sort2[i]:='{';
- end;
- end;
- fillchar(b1,sizeof(b1),0);
- fillchar(b2,sizeof(b2),0);
-
- { weed out duplicates, sort}
- for i:=1 to length(sort1) do b1[ord(sort1[i])]:=weight[ord(sort1[i])];
- for i:=1 to length(sort2) do b2[ord(sort2[i])]:=weight[ord(sort2[i])];
-
- {get total for comparison}
- bick1:=0;
- for i:=ord('a') to ord('{') do bick1:=bick1+b1[i]+b2[i];
-
- {add up all letters common to both words}
- bick2:=0;
- for i:=ord('a') to ord('{') do if b1[i]<>0 then
- if (b1[i]=b2[i]) then bick2:=bick2+b1[i]+b2[i];
-
- {figure match}
- bickell2:=(bick2*100)div bick1;
- end;
-
-
-
- procedure timer;
- var i:integer;
- oldpercent,percent:integer;
- h1,m1,s1,hund1,h2,m2,s2,hund2,h3,m3,s3,hund3:Word;
- strt,stp:real;
- begin
- string1:='ThanKyo';
- string2:='tHanKyouR';
- write('Timing "Similar100" ');
- GetTime(h1,m1,s1,hund1);
- for i:=1 to 30000 do percent:=similar100(string1,string2);
- gettime(h2,m2,s2,hund2);
- strt:=(h1*3600)+(m1*60)+s1+(hund1/100);
- stp:=(h2*3600)+(m2*60)+s2+(hund2/100);
- WriteLn(percent,'% Elapsed time ',(stp-strt):0:2,' seconds');
- write('Timing "Match" ');
- GetTime(h1,m1,s1,hund1);
- for i:=1 to 30000 do percent:=match(string1,string2);
- gettime(h2,m2,s2,hund2);
- strt:=(h1*3600)+(m1*60)+s1+(hund1/100);
- stp:=(h2*3600)+(m2*60)+s2+(hund2/100);
- WriteLn(percent,'% Elapsed time ',(stp-strt):0:2,' seconds');
- write('Timing "Match2" ');
- gettime(h1,m1,s1,hund1);
- for i:=1 to 30000 do percent:=match2(string1,string2);
- gettime(h2,m2,s2,hund2);
- strt:=(h1*3600)+(m1*60)+s1+(hund1/100);
- stp:=(h2*3600)+(m2*60)+s2+(hund2/100);
- WriteLn(percent,'% Elapsed time ',(stp-strt):0:2,' seconds');
- delay(100);
- write('Timing "Match3" ');
- GetTime(h1,m1,s1,hund1);
- for i:=1 to 30000 do percent:=match3(string1,string2,false);
- gettime(h2,m2,s2,hund2);
- strt:=(h1*3600)+(m1*60)+s1+(hund1/100);
- stp:=(h2*3600)+(m2*60)+s2+(hund2/100);
- WriteLn(percent,'% Elapsed time ',(stp-strt):0:2,' seconds');
- delay(100);
- write('Timing "Bickell" ');
- GetTime(h1,m1,s1,hund1);
- for i:=1 to 30000 do percent:=bickell2(string1,string2);
- gettime(h2,m2,s2,hund2);
- strt:=(h1*3600)+(m1*60)+s1+(hund1/100);
- stp:=(h2*3600)+(m2*60)+s2+(hund2/100);
- WriteLn(percent,'% Elapsed time ',(stp-strt):0:2,' seconds');
- end;
- {$F-}
-
- Begin
- clrscr;
-
- repeat
- string1:='';
- for i:=1 to random(10)+2 do string1:=string1+copy(seeds,random(52)+1,1);
- string2:='';
- for i:=1 to random(10)+2 do string2:=string2+copy(seeds,random(52)+1,1);
- percent:=bickell2(String1,String2);
- percent2:=match3(string1,string2,false);
- if (percent>50)or(percent2>50)
- then writeln(percent,' ',percent2,' ', string1,' ',string2);
- until keypressed;
-
- for i:=1 to 3 do timer;
- end.