home *** CD-ROM | disk | FTP | other *** search
- Xref: sparky comp.lang.prolog:1663 comp.lang.c:13292
- Path: sparky!uunet!mcsun!sunic!dkuug!diku!torbenm
- From: torbenm@diku.dk (Torben AEgidius Mogensen)
- Newsgroups: comp.lang.prolog,comp.lang.c
- Subject: Re: Need code to match strings with regular expressions
- Message-ID: <1992Sep7.094208.13949@odin.diku.dk>
- Date: 7 Sep 92 09:42:08 GMT
- References: <82078@bcsaic.boeing.com>
- Sender: torbenm@freke.diku.dk
- Organization: Department of Computer Science, U of Copenhagen
- Lines: 103
-
- milan@bcsaic.boeing.com (Milan Richter) writes:
-
- >I am looking for a public domain code for matching strings with regular
- >expressions. Essentially, given a string and a regular expression (such as,
- >e.g., are used in emacs), I want to tell whether they match.
-
- >Can anyone provide pointers to either C or Prolog implementation?
-
- Below is a Prolog program that does this. It is not terribly
- efficient, as it doesn't generate an automaton but rewrites the
- regular expression as it reads the text. Partial evaluation with
- respect to the regular expression can yield a program equivalent to an
- automaton.
-
- Torben Mogensen (torbenm@diku.dk)
-
-
- % interpreter for regular expressions
-
- :- op(300,yf,*).
- :- op(200,fx,`).
-
- % R ::= empty -- empty string
- % | ` Symbol -- terminal symbol
- % | R ; R -- alternative
- % | R , R -- sequence
- % | R * -- repetition
-
- accepts(R,[]) :-
- accepts_empty(R).
- accepts(R,[S|Ss]) :-
- first(R,S), % test if S can start R
- next(R,S,R1), % find reg-exp R1 for rest of string
- accepts(R1,Ss). % and test that
-
-
- accepts_empty(empty).
- accepts_empty((R1 ; R2)) :-
- accepts_empty(R1) -> % the use of -> reduces backtracking.
- true ; % ! could be used instead (but I hate !).
- accepts_empty(R2).
- accepts_empty((R1 , R2)) :-
- accepts_empty(R1),
- accepts_empty(R2).
- accepts_empty(_ *).
-
- first(` S,S).
- first((R1 ; R2),S) :-
- first(R1,S) ;
- (first(R2,S), \+ first(R1,S)). % reduces backtracking (\+ is not)
- first((R1 , R2),S) :-
- first(R1,S) ;
- (accepts_empty(R1), first(R2,S), \+ first(R1,S)). % ditto
- first(R *,S) :-
- first(R,S).
-
- next(` S,S,empty).
- next((R1 ; R2),S,R) :-
- first(R1,S) ->
- next(R1,S,R11),
- (first(R2,S) ->
- next(R2,S,R21),
- reduce_or(R11,R21,R) ;
- R = R11) ;
- next(R2,S,R).
- next((R1 , R2),S,R) :-
- first(R1,S) ->
- next(R1,S,R11),
- (accepts_empty(R1), first(R2,S) ->
- z next(R2,S,R21),
- reduce_seq(R11,R2,R3),
- reduce_or(R3,R21,R) ;
- reduce_seq(R11,R2,R)) ;
- next(R2,S,R).
- next(R *,S,R2) :-
- next(R,S,R1),
- reduce_seq(R1,R *,R2).
-
- % these reduce the derived regular expressions to avoid blow-up of size
-
- reduce_or(R1,R2,R) :-
- R1 = R2 ->
- R = R1 ;
- ((R1 = (R3,R2) ; R1 = (R2,R3)), accepts_empty(R3) ->
- R = R1 ;
- ((R2 = (R3,R1) ; R2 = (R1,R3)), accepts_empty(R3) ->
- R = R2 ;
- (R1 = (R2 ; _) ->
- R = R1 ;
- (R1 = (_ ; R2) ->
- R = R1 ;
- (R2 = (R1 ; _) ->
- R = R2 ;
- (R2 = (_ ; R1) ->
- R = R2 ;
- R = (R1 ; R2))))))).
-
- reduce_seq(R1,R2,R) :-
- R1 = empty ->
- R = R2 ;
- (R2 = empty ->
- R = R1 ;
- R = (R1 , R2)).
-