NetNews Usenet Archive 1992 #20

home *** CD-ROM | disk | FTP | other *** search

/ NetNews Usenet Archive 1992 #20 / NN_1992_20.iso / spool / comp / lang / prolog / 1663 < prev next >

Wrap

Internet Message Format | 1992-09-08 | 3.0 KB

Xref: sparky comp.lang.prolog:1663 comp.lang.c:13292 Path: sparky!uunet!mcsun!sunic!dkuug!diku!torbenm From: torbenm@diku.dk (Torben AEgidius Mogensen) Newsgroups: comp.lang.prolog,comp.lang.c Subject: Re: Need code to match strings with regular expressions Message-ID: <1992Sep7.094208.13949@odin.diku.dk> Date: 7 Sep 92 09:42:08 GMT References: <82078@bcsaic.boeing.com> Sender: torbenm@freke.diku.dk Organization: Department of Computer Science, U of Copenhagen Lines: 103 milan@bcsaic.boeing.com (Milan Richter) writes: >I am looking for a public domain code for matching strings with regular >expressions. Essentially, given a string and a regular expression (such as, >e.g., are used in emacs), I want to tell whether they match. >Can anyone provide pointers to either C or Prolog implementation? Below is a Prolog program that does this. It is not terribly efficient, as it doesn't generate an automaton but rewrites the regular expression as it reads the text. Partial evaluation with respect to the regular expression can yield a program equivalent to an automaton. Torben Mogensen (torbenm@diku.dk) % interpreter for regular expressions :- op(300,yf,*). :- op(200,fx,`). % R ::= empty -- empty string % | ` Symbol -- terminal symbol % | R ; R -- alternative % | R , R -- sequence % | R * -- repetition accepts(R,[]) :- accepts_empty(R). accepts(R,[S|Ss]) :- first(R,S), % test if S can start R next(R,S,R1), % find reg-exp R1 for rest of string accepts(R1,Ss). % and test that accepts_empty(empty). accepts_empty((R1 ; R2)) :- accepts_empty(R1) -> % the use of -> reduces backtracking. true ; % ! could be used instead (but I hate !). accepts_empty(R2). accepts_empty((R1 , R2)) :- accepts_empty(R1), accepts_empty(R2). accepts_empty(_ *). first(` S,S). first((R1 ; R2),S) :- first(R1,S) ; (first(R2,S), \+ first(R1,S)). % reduces backtracking (\+ is not) first((R1 , R2),S) :- first(R1,S) ; (accepts_empty(R1), first(R2,S), \+ first(R1,S)). % ditto first(R *,S) :- first(R,S). next(` S,S,empty). next((R1 ; R2),S,R) :- first(R1,S) -> next(R1,S,R11), (first(R2,S) -> next(R2,S,R21), reduce_or(R11,R21,R) ; R = R11) ; next(R2,S,R). next((R1 , R2),S,R) :- first(R1,S) -> next(R1,S,R11), (accepts_empty(R1), first(R2,S) -> z next(R2,S,R21), reduce_seq(R11,R2,R3), reduce_or(R3,R21,R) ; reduce_seq(R11,R2,R)) ; next(R2,S,R). next(R *,S,R2) :- next(R,S,R1), reduce_seq(R1,R *,R2). % these reduce the derived regular expressions to avoid blow-up of size reduce_or(R1,R2,R) :- R1 = R2 -> R = R1 ; ((R1 = (R3,R2) ; R1 = (R2,R3)), accepts_empty(R3) -> R = R1 ; ((R2 = (R3,R1) ; R2 = (R1,R3)), accepts_empty(R3) -> R = R2 ; (R1 = (R2 ; _) -> R = R1 ; (R1 = (_ ; R2) -> R = R1 ; (R2 = (R1 ; _) -> R = R2 ; (R2 = (_ ; R1) -> R = R2 ; R = (R1 ; R2))))))). reduce_seq(R1,R2,R) :- R1 = empty -> R = R2 ; (R2 = empty -> R = R1 ; R = (R1 , R2)).