home *** CD-ROM | disk | FTP | other *** search
- Inline(
- {;From the article "Pattern Matching - The Gestalt Approach"}
- {;by John W. Ratcliff and David E. Metzener}
- {;Dr. Dobbs Journal, July 1988}
- {;Rewritten for Turbo Pascal v3.0 inline code and INLINE.COM.}
- {;Now expects normal Pascal strings (first byte is length byte).}
- {;Usage:}
- {;}
- {;FUNCTION Simil(VAR Str1,Str2) : INTEGER;}
- {; VAR result : INTEGER;}
- {; BEGIN}
- {; (*$I simil_p.obj*) (* this code *)}
- {; Simil := result; (*return the function result*)}
- {; END; (* of Simil *)}
- {;}
- {;See TESTSIM.PAS for required global variables.}
- {;}
- {;Extensive remarks removed. See other versions.}
- {; David Kirschbaum}
- {; Toad Hall}
- {; kirsch@braggvax.ARPA}
- $55 { push bp ;save Turbo's BP}
- /$06 { push ES ;and ES}
- /$FC { cld ;insure forward}
- /$C4/$B6/>STR1 { les si,>str1[bp] ;ES:SI = str1 addr}
- /$8B/$BE/>STR2 { mov di,>str2[bp] ;ES:DI = str2 addr}
- {;}
- /$31/$C0 { xor ax,ax ;get a 0}
- /$A3/>SCORE { mov [>score],ax ;zero out SCORE}
- /$A3/>STCKNUM { mov [>stcknum],ax ;init nr of stack entries to 0}
- /$26 { ES:}
- /$38/$04 { cmp [si],al ;is it a null string?}
- /$74/$05 { je StrErr ;can't process null strings}
- /$26 { ES:}
- /$38/$05 { cmp [di],al ;is it a null string?}
- /$75/$03 { jne DoCmp ;Both strs ok, so process them}
- {StrErr:}
- /$E9/$4E/$01 { jmp Exit ;exit routine}
- {;}
- {DoCmp:}
- /$30/$E4 { xor ah,ah ;clear msb}
- /$26 { ES:}
- /$8A/$04 { mov al,[si] ;get str1 length byte}
- /$89/$C1 { mov cx,ax ;accum total len in CX}
- /$01/$F0 { add ax,si ;add in string start}
- /$89/$C3 { mov bx,ax ;BX points to str1 last char}
- /$46 { inc si ;bump SI past str1 length byte}
- {;}
- /$30/$E4 { xor ah,ah ;clear msb}
- /$26 { ES:}
- /$8A/$05 { mov al,[di] ;str2 length byte}
- /$01/$C1 { add cx,ax ;accum total len in CX}
- /$01/$F8 { add ax,di ;add in start}
- /$89/$C5 { mov bp,ax ;BP points to str2 last char}
- /$47 { inc di ;bump DI past str2 length byte}
- {;}
- /$89/$0E/>TOTAL { mov [>total],cx ;store total string length}
- {;}
- /$E8/$ED/$00 { call PushSt ;push values for}
- {; ; first call to SIMILIARITY}
- {Main:}
- /$81/$3E/>STCKNUM/$00/$00{ cmp word [>stcknum],0 ;anything on the stack?}
- /$74/$76 { je Done ;no, then all done!}
- {;}
- /$E8/$05/$01 { call PopSt ;get regs set up for a compare call}
- /$E8/$85/$00 { call Compare ;do compare for this substring set}
- /$09/$D2 { or dx,dx ;if nothing in common}
- { ; then nothing to push}
- /$74/$EE { jz Main ;try another set}
- {;}
- /$D1/$E2 { shl dx,1 ;*2}
- /$01/$16/>SCORE { add [>score],dx ;add into score}
- /$8B/$2E/>STCKNUM { mov bp,[>stcknum] ;get nr of entry}
- { ; I want to look at}
- /$D1/$E5 { shl bp,1 ;get AX ready}
- { ; to access string stacks}
- /$3E { DS: ;override SS:bp}
- /$8B/$B6/>STSTR1L { mov si,[>ststr1L][bp] ;move L1 into SI or L1}
- /$8B/$1E/>CL1 { mov bx,[>cL1] ;move CL1 into BX or R1}
- /$3E { DS: ;override SS:bp}
- /$8B/$BE/>STSTR2L { mov di,[>ststr2L][bp] ;move L2 into DI or L2}
- /$8B/$0E/>CL2 { mov cx,[>cL2] ;move CL2 into CX temporarily}
- /$3E { DS: ;override SS:bp}
- /$8B/$86/>STSTR1R { mov ax,[>ststr1R][bp] ;get old R1 off of stack}
- /$A3/>CL1 { mov [>cL1],ax ;place in CL1 temporarily}
- /$3E { DS: ;override SS:bp}
- /$8B/$86/>STSTR2R { mov ax,[>ststr2R][bp] ;get old R2 off of stack}
- /$A3/>CL2 { mov [>cL2],ax ;save in CL2 temporarily}
- /$89/$CD { mov bp,cx ;place CL2 into BP}
- {;}
- /$39/$F3 { cmp bx,si ;compare CL1 to L1}
- /$74/$11 { je Chrght ;if zero, then nothing}
- {; ; on left side str1}
- /$39/$FD { cmp bp,di ;compare CL2 to L2}
- /$74/$0D { je Chrght ;if zero, then nothing}
- {; ; on left side str2}
- /$4B { dec bx ;point to last part}
- { ; of left side str1}
- /$4D { dec bp ;ditto, str2}
- /$39/$F3 { cmp bx,si ;only one char to examine?}
- /$75/$04 { jne PushIt ;no->we need to examine this}
- /$39/$FD { cmp bp,di ; only one char in both?}
- /$74/$03 { je Chrght ; nothing to look at}
- {; ; if both only contain 1 char}
- {PushIt:}
- /$E8/$96/$00 { call PushSt ;push left side on stack}
- {Chrght:}
- /$8B/$36/>CR1 { mov si,[>cR1] ;move CR1 into SI or L1}
- /$8B/$1E/>CL1 { mov bx,[>cL1] ;move R1 into BX or R1}
- /$8B/$3E/>CR2 { mov di,[>cR2] ;move CR2 into DI or L2}
- /$8B/$2E/>CL2 { mov bp,[>cL2] ;move R2 into BP or R2}
- {;}
- /$39/$DE { cmp si,bx ;compare CR1 to R1}
- /$74/$95 { je Main ;If zero, then nothing}
- { ; on right side str1}
- /$39/$EF { cmp di,bp ;compare CR2 to R2}
- /$74/$91 { je Main ;if zero, then nothing}
- { ; on right side str2}
- /$46 { inc si ;point to last part}
- { ; of right side str1}
- /$47 { inc di ;ditto, str2}
- /$39/$F3 { cmp bx,si ;only one char to examine?}
- /$75/$04 { jne Push2 ;no-> examine it}
- /$39/$FD { cmp bp,di ; only 1 char to examine in both?}
- /$74/$87 { je Main ; yes-> get next string off of stack}
- {;}
- {Push2:}
- /$E8/$71/$00 { call PushSt ;push right side on stack}
- /$EB/$82 { jmp short Main ;do next level of compares}
- {;}
- {Done:}
- /$A1/>SCORE { mov ax,[>score] ;get score into AX for MUL}
- /$09/$C0 { or ax,ax ;zero score?}
- /$74/$0B { jz Donit ;yep, skip this mess}
- /$B9/$64/$00 { mov cx,100 ; get 100 into CX for MUL}
- /$F7/$E1 { mul cx ; multiply by 100}
- /$8B/$0E/>TOTAL { mov cx,[>total] ; get total chars for divide}
- /$F7/$F1 { div cx ; divide by total}
- {Donit:}
- /$E9/$9C/$00 { jmp Exit ;clean up and exit}
- {;}
- {;subroutines}
- {Compare:}
- /$89/$2E/>S2ED { mov [>s2ed],bp ;store end of str2}
- /$31/$D2 { xor dx,dx ;init MAXCHARS}
- {ForL3:}
- /$57 { push di ;save start of str2}
- {ForL4:}
- /$57 { push di ;save start of str2}
- /$56 { push si ;save start of str1}
- /$8B/$0E/>S2ED { mov cx,[>s2ed] ;set up for calc}
- { ; of length of str1}
- /$29/$F9 { sub cx,di ;get length of str1 -1}
- /$41 { inc cx ;make proper length}
- /$51 { push cx ;save str1 starting len}
- /$1E { push DS ;save our DS a sec}
- /$8C/$C0 { mov ax,ES}
- /$8E/$D8 { mov DS,ax ;force DS=ES for the compare}
- /$F3/$A6 { repz cmpsb ;compare strings}
- /$1F { pop DS ;restore our DS}
- /$74/$01 { jz Equal ;if equal, then skip fixes}
- /$41 { inc cx ; inc back because CMPS decs}
- { ; even if not equal}
- {Equal:}
- /$58 { pop ax ;get str1 starting len}
- /$29/$C8 { sub ax,cx ;get length of common characters}
- /$75/$0E { jnz NewMax ;more than 0 chars matched}
- {;}
- /$5E { pop si ;get back start of str1}
- /$5F { pop di ;ditto str2}
- {;}
- {Reent:}
- /$47 { inc di ;do the next char no matter what}
- {Reent2:}
- /$39/$EF { cmp di,bp ;are we done with str2?}
- /$7E/$DF { jle ForL4 ;no, then do next string compare}
- /$5F { pop di ;get back start of str2}
- /$46 { inc si ;next char in str1 to scan}
- /$39/$DE { cmp si,bx ;are we done with str1?}
- /$7E/$D8 { jle ForL3 ;no, then do next string compare}
- /$C3 { ret ;MAXCHARS is in DX}
- {;}
- {NewMax:}
- /$5E { pop si ;get back start of str1}
- /$5F { pop di ;and str2}
- /$39/$D0 { cmp ax,dx ;greater than MAXCHARS?}
- /$7F/$04 { jg NewMx2 ;yes, update new MAXCHARS}
- { ;and pointers}
- /$01/$C7 { add di,ax ; skip past matching chars}
- /$EB/$EB { jmp short Reent2 ; reenter main loop}
- {;}
- {NewMx2:}
- /$89/$36/>CL1 { mov [>cL1],si ;put begin of match of str1}
- /$89/$3E/>CL2 { mov [>cL2],di ;ditto, str2}
- /$89/$C1 { mov cx,ax ;save new MAXCHARS}
- {;}
- /$29/$D0 { sub ax,dx ;get delta for adjustment}
- { ; to ends of strings}
- /$29/$C3 { sub bx,ax ;adjust end of str1}
- /$29/$C5 { sub bp,ax ; and str2}
- {;}
- /$89/$CA { mov dx,cx ;new MAXCHARS}
- /$49 { dec cx ;set up for advance}
- { ; to last matching char}
- /$01/$CF { add di,cx ;bump to last matching char str2}
- /$89/$3E/>CR2 { mov [>cR2],di ;put end of match of str2}
- /$01/$F1 { add cx,si ;bump to last matching char str1}
- /$89/$0E/>CR1 { mov [>cR1],cx ;put end of match of str1}
- /$EB/$C9 { jmp short Reent ;reenter inner loop}
- {;}
- {PushSt:}
- {; On Entry:}
- {; BX = R1 (right side of str1)}
- {; DS:SI = L1 (left side of str1)}
- {; ES:Di = L2 (left side of str2)}
- {; BP = R2 (right side of str2)}
- {;}
- /$89/$E9 { mov cx,bp ;save R2}
- /$8B/$2E/>STCKNUM { mov bp,[>stcknum]}
- /$D1/$E5 { shl bp,1 ;*2 for words}
- /$3E { DS: ;override SS:bp}
- /$89/$B6/>STSTR1L { mov [>ststr1L][bp],si ;put left side of str1 on stack}
- /$3E { DS: ;override SS:bp}
- /$89/$9E/>STSTR1R { mov [>ststr1R][bp],bx ;same with right side}
- /$3E { DS: ;override SS:bp}
- /$89/$BE/>STSTR2L { mov [>ststr2L][bp],di ;put left side of str2 on stack}
- /$3E { DS: ;override SS:bp}
- /$89/$8E/>STSTR2R { mov [>ststr2R][bp],cx ;same with right side}
- /$FF/$06/>STCKNUM { inc word [>stcknum] ;bump nr of stack entries}
- /$89/$CD { mov bp,cx ;restore R2}
- /$C3 { ret}
- {;}
- {PopSt:}
- {; BX = R1 (right side of str1)}
- {; DS:SI = L1 (left side of str1)}
- {; ES:Di = L2 (left side of str2)}
- {; BP = R2 (right side of str2)}
- {;}
- /$FF/$0E/>STCKNUM { dec word [>stcknum] ;point to last entry on stack}
- /$8B/$2E/>STCKNUM { mov bp,[>stcknum] ;get number of stack entries}
- /$D1/$E5 { shl bp,1 ;*2 for words}
- /$3E { DS: ;override SS:bp}
- /$8B/$B6/>STSTR1L { mov si,[>ststr1L][bp] ;restore left side}
- { ; of str1 from stack}
- /$3E { DS: ;override SS:bp}
- /$8B/$9E/>STSTR1R { mov bx,[>ststr1R][bp] ;ditto, right side}
- /$3E { DS: ;override SS:bp}
- /$8B/$BE/>STSTR2L { mov di,[>ststr2L][bp] ;restore left side}
- { ; of str2 from stack}
- /$3E { DS: ;override SS:bp}
- /$8B/$AE/>STSTR2R { mov bp,[>ststr2R][bp] ;ditto, right side}
- /$C3 { ret}
- {;}
- {Exit:}
- /$07 { pop ES ;restore ES}
- /$5D { pop bp ;restore Turbo's BP}
- /$89/$86/>RESULT { mov >result[bp],ax ;return local value}
- );
-