home *** CD-ROM | disk | FTP | other *** search
- ;From the article "Pattern Matching - The Gestalt Approach"
- ;by John W. Ratcliff and David E. Metzener
- ;Dr. Dobbs Journal, July 1988
- ;Rewritten for Turbo Pascal v3.0 inline code and INLINE.COM.
- ;Now expects normal Pascal strings (first byte is length byte).
- ;Usage:
- ;
- ;FUNCTION Simil(VAR Str1,Str2) : INTEGER;
- ; VAR result : INTEGER;
- ; BEGIN
- ; (*$I simil_p.obj*) (* this code *)
- ; Simil := result; (*return the function result*)
- ; END; (* of Simil *)
- ;
- ;See TESTSIM.PAS for required global variables.
- ;
- ;Extensive remarks removed. See other versions.
- ; David Kirschbaum
- ; Toad Hall
- ; kirsch@braggvax.ARPA
-
- push bp ;save Turbo's BP
- push ES ;and ES
- cld ;insure forward
- les si,>str1[bp] ;ES:SI = str1 addr
- mov di,>str2[bp] ;ES:DI = str2 addr
- ;
- xor ax,ax ;get a 0
- mov [>score],ax ;zero out SCORE
- mov [>stcknum],ax ;init nr of stack entries to 0
- ES:
- cmp [si],al ;is it a null string?
- je StrErr ;can't process null strings
- ES:
- cmp [di],al ;is it a null string?
- jne DoCmp ;Both strs ok, so process them
- StrErr:
- jmp Exit ;exit routine
- ;
- DoCmp:
- xor ah,ah ;clear msb
- ES:
- mov al,[si] ;get str1 length byte
- mov cx,ax ;accum total len in CX
- add ax,si ;add in string start
- mov bx,ax ;BX points to str1 last char
- inc si ;bump SI past str1 length byte
- ;
- xor ah,ah ;clear msb
- ES:
- mov al,[di] ;str2 length byte
- add cx,ax ;accum total len in CX
- add ax,di ;add in start
- mov bp,ax ;BP points to str2 last char
- inc di ;bump DI past str2 length byte
- ;
- mov [>total],cx ;store total string length
- ;
- call PushSt ;push values for
- ; ; first call to SIMILIARITY
- Main:
- cmp word [>stcknum],0 ;anything on the stack?
- je Done ;no, then all done!
- ;
- call PopSt ;get regs set up for a compare call
- call Compare ;do compare for this substring set
- or dx,dx ;if nothing in common
- ; then nothing to push
- jz Main ;try another set
- ;
- shl dx,1 ;*2
- add [>score],dx ;add into score
- mov bp,[>stcknum] ;get nr of entry
- ; I want to look at
- shl bp,1 ;get AX ready
- ; to access string stacks
- DS: ;override SS:bp
- mov si,[>ststr1L][bp] ;move L1 into SI or L1
- mov bx,[>cL1] ;move CL1 into BX or R1
- DS: ;override SS:bp
- mov di,[>ststr2L][bp] ;move L2 into DI or L2
- mov cx,[>cL2] ;move CL2 into CX temporarily
- DS: ;override SS:bp
- mov ax,[>ststr1R][bp] ;get old R1 off of stack
- mov [>cL1],ax ;place in CL1 temporarily
- DS: ;override SS:bp
- mov ax,[>ststr2R][bp] ;get old R2 off of stack
- mov [>cL2],ax ;save in CL2 temporarily
- mov bp,cx ;place CL2 into BP
- ;
- cmp bx,si ;compare CL1 to L1
- je Chrght ;if zero, then nothing
- ; ; on left side str1
- cmp bp,di ;compare CL2 to L2
- je Chrght ;if zero, then nothing
- ; ; on left side str2
- dec bx ;point to last part
- ; of left side str1
- dec bp ;ditto, str2
- cmp bx,si ;only one char to examine?
- jne PushIt ;no->we need to examine this
- cmp bp,di ; only one char in both?
- je Chrght ; nothing to look at
- ; ; if both only contain 1 char
- PushIt:
- call PushSt ;push left side on stack
- Chrght:
- mov si,[>cR1] ;move CR1 into SI or L1
- mov bx,[>cL1] ;move R1 into BX or R1
- mov di,[>cR2] ;move CR2 into DI or L2
- mov bp,[>cL2] ;move R2 into BP or R2
- ;
- cmp si,bx ;compare CR1 to R1
- je Main ;If zero, then nothing
- ; on right side str1
- cmp di,bp ;compare CR2 to R2
- je Main ;if zero, then nothing
- ; on right side str2
- inc si ;point to last part
- ; of right side str1
- inc di ;ditto, str2
- cmp bx,si ;only one char to examine?
- jne Push2 ;no-> examine it
- cmp bp,di ; only 1 char to examine in both?
- je Main ; yes-> get next string off of stack
- ;
- Push2:
- call PushSt ;push right side on stack
- jmp short Main ;do next level of compares
- ;
- Done:
- mov ax,[>score] ;get score into AX for MUL
- or ax,ax ;zero score?
- jz Donit ;yep, skip this mess
- mov cx,100 ; get 100 into CX for MUL
- mul cx ; multiply by 100
- mov cx,[>total] ; get total chars for divide
- div cx ; divide by total
- Donit:
- jmp Exit ;clean up and exit
- ;
- ;subroutines
- Compare:
- mov [>s2ed],bp ;store end of str2
- xor dx,dx ;init MAXCHARS
- ForL3:
- push di ;save start of str2
- ForL4:
- push di ;save start of str2
- push si ;save start of str1
- mov cx,[>s2ed] ;set up for calc
- ; of length of str1
- sub cx,di ;get length of str1 -1
- inc cx ;make proper length
- push cx ;save str1 starting len
- push DS ;save our DS a sec
- mov ax,ES
- mov DS,ax ;force DS=ES for the compare
- repz cmpsb ;compare strings
- pop DS ;restore our DS
- jz Equal ;if equal, then skip fixes
- inc cx ; inc back because CMPS decs
- ; even if not equal
- Equal:
- pop ax ;get str1 starting len
- sub ax,cx ;get length of common characters
- jnz NewMax ;more than 0 chars matched
- ;
- pop si ;get back start of str1
- pop di ;ditto str2
- ;
- Reent:
- inc di ;do the next char no matter what
- Reent2:
- cmp di,bp ;are we done with str2?
- jle ForL4 ;no, then do next string compare
- pop di ;get back start of str2
- inc si ;next char in str1 to scan
- cmp si,bx ;are we done with str1?
- jle ForL3 ;no, then do next string compare
- ret ;MAXCHARS is in DX
- ;
- NewMax:
- pop si ;get back start of str1
- pop di ;and str2
- cmp ax,dx ;greater than MAXCHARS?
- jg NewMx2 ;yes, update new MAXCHARS
- ;and pointers
- add di,ax ; skip past matching chars
- jmp short Reent2 ; reenter main loop
- ;
- NewMx2:
- mov [>cL1],si ;put begin of match of str1
- mov [>cL2],di ;ditto, str2
- mov cx,ax ;save new MAXCHARS
- ;
- sub ax,dx ;get delta for adjustment
- ; to ends of strings
- sub bx,ax ;adjust end of str1
- sub bp,ax ; and str2
- ;
- mov dx,cx ;new MAXCHARS
- dec cx ;set up for advance
- ; to last matching char
- add di,cx ;bump to last matching char str2
- mov [>cR2],di ;put end of match of str2
- add cx,si ;bump to last matching char str1
- mov [>cR1],cx ;put end of match of str1
- jmp short Reent ;reenter inner loop
- ;
- PushSt:
- ; On Entry:
- ; BX = R1 (right side of str1)
- ; DS:SI = L1 (left side of str1)
- ; ES:Di = L2 (left side of str2)
- ; BP = R2 (right side of str2)
- ;
- mov cx,bp ;save R2
- mov bp,[>stcknum]
- shl bp,1 ;*2 for words
- DS: ;override SS:bp
- mov [>ststr1L][bp],si ;put left side of str1 on stack
- DS: ;override SS:bp
- mov [>ststr1R][bp],bx ;same with right side
- DS: ;override SS:bp
- mov [>ststr2L][bp],di ;put left side of str2 on stack
- DS: ;override SS:bp
- mov [>ststr2R][bp],cx ;same with right side
- inc word [>stcknum] ;bump nr of stack entries
- mov bp,cx ;restore R2
- ret
- ;
- PopSt:
- ; BX = R1 (right side of str1)
- ; DS:SI = L1 (left side of str1)
- ; ES:Di = L2 (left side of str2)
- ; BP = R2 (right side of str2)
- ;
- dec word [>stcknum] ;point to last entry on stack
- mov bp,[>stcknum] ;get number of stack entries
- shl bp,1 ;*2 for words
- DS: ;override SS:bp
- mov si,[>ststr1L][bp] ;restore left side
- ; of str1 from stack
- DS: ;override SS:bp
- mov bx,[>ststr1R][bp] ;ditto, right side
- DS: ;override SS:bp
- mov di,[>ststr2L][bp] ;restore left side
- ; of str2 from stack
- DS: ;override SS:bp
- mov bp,[>ststr2R][bp] ;ditto, right side
- ret
- ;
- Exit:
- pop ES ;restore ES
- pop bp ;restore Turbo's BP
- mov >result[bp],ax ;return local value