home *** CD-ROM | disk | FTP | other *** search
/ Source Code 1992 March / Source_Code_CD-ROM_Walnut_Creek_March_1992.iso / msdos / txtutl / gestalt.arc / SIMIL_P.OBJ < prev   
Encoding:
Text File  |  1988-12-06  |  16.2 KB  |  259 lines

  1. Inline(
  2.                          {;From the article "Pattern Matching - The Gestalt Approach"}
  3.                          {;by John W. Ratcliff and David E. Metzener}
  4.                          {;Dr. Dobbs Journal, July 1988}
  5.                          {;Rewritten for Turbo Pascal v3.0 inline code and INLINE.COM.}
  6.                          {;Now expects normal Pascal strings (first byte is length byte).}
  7.                          {;Usage:}
  8.                          {;}
  9.                          {;FUNCTION Simil(VAR Str1,Str2) : INTEGER;}
  10.                          {;  VAR  result : INTEGER;}
  11.                          {;  BEGIN}
  12.                          {;    (*$I simil_p.obj*)    (* this code *)}
  13.                          {;    Simil := result;  (*return the function result*)}
  14.                          {;  END;  (* of Simil *)}
  15.                          {;}
  16.                          {;See TESTSIM.PAS for required global variables.}
  17.                          {;}
  18.                          {;Extensive remarks removed.  See other versions.}
  19.                          {; David Kirschbaum}
  20.                          {; Toad Hall}
  21.                          {; kirsch@braggvax.ARPA}
  22.   $55                    {  push  bp              ;save Turbo's BP}
  23.   /$06                   {  push  ES              ;and ES}
  24.   /$FC                   {  cld                   ;insure forward}
  25.   /$C4/$B6/>STR1         {  les   si,>str1[bp]    ;ES:SI = str1 addr}
  26.   /$8B/$BE/>STR2         {  mov   di,>str2[bp]    ;ES:DI = str2 addr}
  27.                          {;}
  28.   /$31/$C0               {  xor   ax,ax           ;get a 0}
  29.   /$A3/>SCORE            {  mov   [>score],ax     ;zero out SCORE}
  30.   /$A3/>STCKNUM          {  mov   [>stcknum],ax   ;init nr of stack entries to 0}
  31.   /$26                   {  ES:}
  32.   /$38/$04               {  cmp   [si],al         ;is it a null string?}
  33.   /$74/$05               {  je    StrErr          ;can't process null strings}
  34.   /$26                   {   ES:}
  35.   /$38/$05               {   cmp  [di],al         ;is it a null string?}
  36.   /$75/$03               {   jne  DoCmp           ;Both strs ok, so process them}
  37.                          {StrErr:}
  38.   /$E9/$4E/$01           {  jmp   Exit            ;exit routine}
  39.                          {;}
  40.                          {DoCmp:}
  41.   /$30/$E4               {  xor   ah,ah           ;clear msb}
  42.   /$26                   {  ES:}
  43.   /$8A/$04               {  mov   al,[si]         ;get str1 length byte}
  44.   /$89/$C1               {  mov   cx,ax           ;accum total len in CX}
  45.   /$01/$F0               {  add   ax,si           ;add in string start}
  46.   /$89/$C3               {  mov   bx,ax           ;BX points to str1 last char}
  47.   /$46                   {  inc   si              ;bump SI past str1 length byte}
  48.                          {;}
  49.   /$30/$E4               {  xor   ah,ah           ;clear msb}
  50.   /$26                   {  ES:}
  51.   /$8A/$05               {  mov   al,[di]         ;str2 length byte}
  52.   /$01/$C1               {  add   cx,ax           ;accum total len in CX}
  53.   /$01/$F8               {  add   ax,di           ;add in start}
  54.   /$89/$C5               {  mov   bp,ax           ;BP points to str2 last char}
  55.   /$47                   {  inc   di              ;bump DI past str2 length byte}
  56.                          {;}
  57.   /$89/$0E/>TOTAL        {  mov   [>total],cx     ;store total string length}
  58.                          {;}
  59.   /$E8/$ED/$00           {  call  PushSt          ;push values for}
  60.                          {;                        ; first call to SIMILIARITY}
  61.                          {Main:}
  62.   /$81/$3E/>STCKNUM/$00/$00{  cmp   word [>stcknum],0 ;anything on the stack?}
  63.   /$74/$76               {  je    Done            ;no, then all done!}
  64.                          {;}
  65.   /$E8/$05/$01           {  call  PopSt           ;get regs set up for a compare call}
  66.   /$E8/$85/$00           {  call  Compare         ;do compare for this substring set}
  67.   /$09/$D2               {  or    dx,dx           ;if nothing in common}
  68.                          {                        ; then nothing to push}
  69.   /$74/$EE               {  jz    Main            ;try another set}
  70.                          {;}
  71.   /$D1/$E2               {  shl   dx,1              ;*2}
  72.   /$01/$16/>SCORE        {  add   [>score],dx       ;add into score}
  73.   /$8B/$2E/>STCKNUM      {  mov   bp,[>stcknum]     ;get nr of entry}
  74.                          {                          ; I want to look at}
  75.   /$D1/$E5               {  shl   bp,1              ;get AX ready}
  76.                          {                          ; to access string stacks}
  77.   /$3E                   {  DS:                     ;override SS:bp}
  78.   /$8B/$B6/>STSTR1L      {  mov   si,[>ststr1L][bp] ;move L1 into SI or L1}
  79.   /$8B/$1E/>CL1          {  mov   bx,[>cL1]         ;move CL1 into BX or R1}
  80.   /$3E                   {  DS:                     ;override SS:bp}
  81.   /$8B/$BE/>STSTR2L      {  mov   di,[>ststr2L][bp] ;move L2 into DI or L2}
  82.   /$8B/$0E/>CL2          {  mov   cx,[>cL2]         ;move CL2 into CX temporarily}
  83.   /$3E                   {  DS:                     ;override SS:bp}
  84.   /$8B/$86/>STSTR1R      {  mov   ax,[>ststr1R][bp] ;get old R1 off of stack}
  85.   /$A3/>CL1              {  mov   [>cL1],ax         ;place in CL1 temporarily}
  86.   /$3E                   {  DS:                     ;override SS:bp}
  87.   /$8B/$86/>STSTR2R      {  mov   ax,[>ststr2R][bp] ;get old R2 off of stack}
  88.   /$A3/>CL2              {  mov   [>cL2],ax         ;save in CL2 temporarily}
  89.   /$89/$CD               {  mov   bp,cx             ;place CL2 into BP}
  90.                          {;}
  91.   /$39/$F3               {  cmp   bx,si           ;compare CL1 to L1}
  92.   /$74/$11               {  je    Chrght          ;if zero, then nothing}
  93.                          {;                       ; on left side str1}
  94.   /$39/$FD               {  cmp   bp,di           ;compare CL2 to L2}
  95.   /$74/$0D               {  je    Chrght          ;if zero, then nothing}
  96.                          {;                       ; on left side str2}
  97.   /$4B                   {  dec   bx              ;point to last part}
  98.                          {                        ; of left side str1}
  99.   /$4D                   {  dec   bp              ;ditto, str2}
  100.   /$39/$F3               {  cmp   bx,si           ;only one char to examine?}
  101.   /$75/$04               {  jne   PushIt          ;no->we need to examine this}
  102.   /$39/$FD               {   cmp  bp,di           ; only one char in both?}
  103.   /$74/$03               {   je   Chrght          ;  nothing to look at}
  104.                          {;                       ;  if both only contain 1 char}
  105.                          {PushIt:}
  106.   /$E8/$96/$00           {  call  PushSt          ;push left side on stack}
  107.                          {Chrght:}
  108.   /$8B/$36/>CR1          {  mov   si,[>cR1]       ;move CR1 into SI or L1}
  109.   /$8B/$1E/>CL1          {  mov   bx,[>cL1]       ;move R1 into BX or R1}
  110.   /$8B/$3E/>CR2          {  mov   di,[>cR2]       ;move CR2 into DI or L2}
  111.   /$8B/$2E/>CL2          {  mov   bp,[>cL2]       ;move R2 into BP or R2}
  112.                          {;}
  113.   /$39/$DE               {  cmp   si,bx           ;compare CR1 to R1}
  114.   /$74/$95               {  je    Main            ;If zero, then nothing}
  115.                          {                        ; on right side str1}
  116.   /$39/$EF               {  cmp   di,bp           ;compare CR2 to R2}
  117.   /$74/$91               {  je    Main            ;if zero, then nothing}
  118.                          {                        ; on right side str2}
  119.   /$46                   {  inc   si              ;point to last part}
  120.                          {                        ; of right side str1}
  121.   /$47                   {  inc   di              ;ditto, str2}
  122.   /$39/$F3               {  cmp   bx,si           ;only one char to examine?}
  123.   /$75/$04               {  jne   Push2           ;no-> examine it}
  124.   /$39/$FD               {   cmp  bp,di           ; only 1 char to examine in both?}
  125.   /$74/$87               {   je   Main            ; yes-> get next string off of stack}
  126.                          {;}
  127.                          {Push2:}
  128.   /$E8/$71/$00           {  call  PushSt          ;push right side on stack}
  129.   /$EB/$82               {  jmp   short Main      ;do next level of compares}
  130.                          {;}
  131.                          {Done:}
  132.   /$A1/>SCORE            {  mov   ax,[>score]     ;get score into AX for MUL}
  133.   /$09/$C0               {  or    ax,ax           ;zero score?}
  134.   /$74/$0B               {  jz    Donit           ;yep, skip this mess}
  135.   /$B9/$64/$00           {   mov  cx,100          ; get 100 into CX for MUL}
  136.   /$F7/$E1               {   mul  cx              ; multiply by 100}
  137.   /$8B/$0E/>TOTAL        {   mov  cx,[>total]     ; get total chars for divide}
  138.   /$F7/$F1               {   div  cx              ; divide by total}
  139.                          {Donit:}
  140.   /$E9/$9C/$00           {  jmp   Exit            ;clean up and exit}
  141.                          {;}
  142.                          {;subroutines}
  143.                          {Compare:}
  144.   /$89/$2E/>S2ED         {  mov   [>s2ed],bp      ;store end of str2}
  145.   /$31/$D2               {  xor   dx,dx           ;init MAXCHARS}
  146.                          {ForL3:}
  147.   /$57                   {  push  di              ;save start of str2}
  148.                          {ForL4:}
  149.   /$57                   {  push  di              ;save start of str2}
  150.   /$56                   {  push  si              ;save start of str1}
  151.   /$8B/$0E/>S2ED         {  mov   cx,[>s2ed]      ;set up for calc}
  152.                          {                        ; of length of str1}
  153.   /$29/$F9               {  sub   cx,di           ;get length of str1 -1}
  154.   /$41                   {  inc   cx              ;make proper length}
  155.   /$51                   {  push  cx              ;save str1 starting len}
  156.   /$1E                   {  push  DS              ;save our DS a sec}
  157.   /$8C/$C0               {  mov   ax,ES}
  158.   /$8E/$D8               {  mov   DS,ax           ;force DS=ES for the compare}
  159.   /$F3/$A6               {  repz  cmpsb           ;compare strings}
  160.   /$1F                   {  pop   DS              ;restore our DS}
  161.   /$74/$01               {  jz    Equal           ;if equal, then skip fixes}
  162.   /$41                   {   inc  cx              ; inc back because CMPS decs}
  163.                          {                        ; even if not equal}
  164.                          {Equal:}
  165.   /$58                   {  pop   ax              ;get str1 starting len}
  166.   /$29/$C8               {  sub   ax,cx           ;get length of common characters}
  167.   /$75/$0E               {  jnz   NewMax          ;more than 0 chars matched}
  168.                          {;}
  169.   /$5E                   {  pop   si              ;get back start of str1}
  170.   /$5F                   {  pop   di              ;ditto str2}
  171.                          {;}
  172.                          {Reent:}
  173.   /$47                   {  inc   di              ;do the next char no matter what}
  174.                          {Reent2:}
  175.   /$39/$EF               {  cmp   di,bp           ;are we done with str2?}
  176.   /$7E/$DF               {  jle   ForL4           ;no, then do next string compare}
  177.   /$5F                   {  pop   di              ;get back start of str2}
  178.   /$46                   {  inc   si              ;next char in str1 to scan}
  179.   /$39/$DE               {  cmp   si,bx           ;are we done with str1?}
  180.   /$7E/$D8               {  jle   ForL3           ;no, then do next string compare}
  181.   /$C3                   {  ret                   ;MAXCHARS is in DX}
  182.                          {;}
  183.                          {NewMax:}
  184.   /$5E                   {  pop   si              ;get back start of str1}
  185.   /$5F                   {  pop   di              ;and str2}
  186.   /$39/$D0               {  cmp   ax,dx           ;greater than MAXCHARS?}
  187.   /$7F/$04               {  jg    NewMx2          ;yes, update new MAXCHARS}
  188.                          {                        ;and pointers}
  189.   /$01/$C7               {   add  di,ax           ; skip past matching chars}
  190.   /$EB/$EB               {   jmp  short Reent2    ; reenter main loop}
  191.                          {;}
  192.                          {NewMx2:}
  193.   /$89/$36/>CL1          {  mov   [>cL1],si       ;put begin of match of str1}
  194.   /$89/$3E/>CL2          {  mov   [>cL2],di       ;ditto, str2}
  195.   /$89/$C1               {  mov   cx,ax           ;save new MAXCHARS}
  196.                          {;}
  197.   /$29/$D0               {  sub   ax,dx           ;get delta for adjustment}
  198.                          {                        ; to ends of strings}
  199.   /$29/$C3               {  sub   bx,ax           ;adjust end of str1}
  200.   /$29/$C5               {  sub   bp,ax           ; and str2}
  201.                          {;}
  202.   /$89/$CA               {  mov   dx,cx           ;new MAXCHARS}
  203.   /$49                   {  dec   cx              ;set up for advance}
  204.                          {                        ; to last matching char}
  205.   /$01/$CF               {  add   di,cx           ;bump to last matching char str2}
  206.   /$89/$3E/>CR2          {  mov   [>cR2],di       ;put end of match of str2}
  207.   /$01/$F1               {  add   cx,si           ;bump to last matching char str1}
  208.   /$89/$0E/>CR1          {  mov   [>cR1],cx       ;put end of match of str1}
  209.   /$EB/$C9               {  jmp   short Reent     ;reenter inner loop}
  210.                          {;}
  211.                          {PushSt:}
  212.                          {; On Entry:}
  213.                          {;       BX      = R1 (right side of str1)}
  214.                          {;       DS:SI   = L1 (left side of str1)}
  215.                          {;       ES:Di   = L2 (left side of str2)}
  216.                          {;       BP      = R2 (right side of str2)}
  217.                          {;}
  218.   /$89/$E9               {  mov   cx,bp           ;save R2}
  219.   /$8B/$2E/>STCKNUM      {  mov   bp,[>stcknum]}
  220.   /$D1/$E5               {  shl   bp,1              ;*2 for words}
  221.   /$3E                   {  DS:                     ;override SS:bp}
  222.   /$89/$B6/>STSTR1L      {  mov   [>ststr1L][bp],si ;put left side of str1 on stack}
  223.   /$3E                   {  DS:                     ;override SS:bp}
  224.   /$89/$9E/>STSTR1R      {  mov   [>ststr1R][bp],bx ;same with right side}
  225.   /$3E                   {  DS:                     ;override SS:bp}
  226.   /$89/$BE/>STSTR2L      {  mov   [>ststr2L][bp],di ;put left side of str2 on stack}
  227.   /$3E                   {  DS:                     ;override SS:bp}
  228.   /$89/$8E/>STSTR2R      {  mov   [>ststr2R][bp],cx ;same with right side}
  229.   /$FF/$06/>STCKNUM      {  inc   word [>stcknum]   ;bump nr of stack entries}
  230.   /$89/$CD               {  mov   bp,cx             ;restore R2}
  231.   /$C3                   {  ret}
  232.                          {;}
  233.                          {PopSt:}
  234.                          {;       BX      = R1 (right side of str1)}
  235.                          {;       DS:SI   = L1 (left side of str1)}
  236.                          {;       ES:Di   = L2 (left side of str2)}
  237.                          {;       BP      = R2 (right side of str2)}
  238.                          {;}
  239.   /$FF/$0E/>STCKNUM      {  dec   word [>stcknum]   ;point to last entry on stack}
  240.   /$8B/$2E/>STCKNUM      {  mov   bp,[>stcknum]     ;get number of stack entries}
  241.   /$D1/$E5               {  shl   bp,1              ;*2 for words}
  242.   /$3E                   {  DS:                     ;override SS:bp}
  243.   /$8B/$B6/>STSTR1L      {  mov   si,[>ststr1L][bp] ;restore left side}
  244.                          {                          ; of str1 from stack}
  245.   /$3E                   {  DS:                     ;override SS:bp}
  246.   /$8B/$9E/>STSTR1R      {  mov   bx,[>ststr1R][bp] ;ditto, right side}
  247.   /$3E                   {  DS:                     ;override SS:bp}
  248.   /$8B/$BE/>STSTR2L      {  mov   di,[>ststr2L][bp] ;restore left side}
  249.                          {                          ; of str2 from stack}
  250.   /$3E                   {  DS:                     ;override SS:bp}
  251.   /$8B/$AE/>STSTR2R      {  mov   bp,[>ststr2R][bp] ;ditto, right side}
  252.   /$C3                   {  ret}
  253.                          {;}
  254.                          {Exit:}
  255.   /$07                   {  pop   ES                ;restore ES}
  256.   /$5D                   {  pop   bp                ;restore Turbo's BP}
  257.   /$89/$86/>RESULT       {  mov  >result[bp],ax     ;return local value}
  258. );
  259.