-
Notifications
You must be signed in to change notification settings - Fork 1
Bruce Mitchener edited this page Jan 6, 2014
·
1 revision
;; -*-*-*- START re.goo -*-*-*- ;; ;; Name: re ;; Author: Neel Krishnaswami <[email protected]> ;; Date: 24 Sep 2002 ;; Version: 0.1 ;; Synopsis: This module implements a regexp engine in GOO. ; ; This module implements regular expression matching. These regexps ; don't need to be compiled before they can be matched against input. ; They can also match patterns of arbitrary objects in arbitrary ; sequences. Right now the main drawback is that no subexpression ; matching is possible to get substrings. ; ; Examples: ; ; (match? (re-alt (re-seq "foo") (re-seq "bar")) "foo") => #t ; (match? (re-alt (re-seq "foo") (re-seq "bar")) "baz") => #f ; (match? (re-cat (re-star (re-sym 'foo)) (re-sym 'bar)) ; '(foo foo bar)) => #t ; ; TODO: ; o Add macros to enable Olin Shiver's SRE regexp syntax. ; o Add infrastructure to enable subexpressions and semantic ; actions. ; (use goo) (use tests/driver) ; ;; The basic regexp classes. ; ;; Classes and constructors. ; (dc <regexp> (<any>)) ; ;; re-zero ;; never successfully matches ; (dc <re-zero> (<regexp>)) (dv re-zero (new <re-zero>)) ; ;; re-unit ;; matches the empty string ; (dc <re-unit> (<regexp>)) (dv re-unit (new <re-unit>)) ; ;; re-sym (o|<any> => <regexp>) -- a regexp that matches one object ; (dc <re-sym> (<regexp>)) (dp val (<re-sym> => <any>)) ; (df re-sym (o|<any> => <re-sym>) (new <re-sym> val o)) ; ;; re-any ;; This regexp matches any single object. ; (dc <re-any> (<regexp>)) (dv re-any (new <re-any>)) ; ;; star (r|<regexp> => <regexp>) ;; construct a regexp that matches r's pattern zero or more times ; (dc <re-star> (<regexp>)) (dp re (<re-star> => <regexp>)) ; (dg re-star (r|<regexp> => <regexp>)) (dm re-star (r|<re-unit> => <regexp>) re-unit) (dm re-star (r|<re-zero> => <regexp>) re-unit) (dm re-star (r|<regexp> => <regexp>) (new <re-star> re r)) ; ;; re-plus (r|<regexp> => <regexp>) ;; construct a regexp that matches r's pattern one or more times. ; (dc <re-plus> (<regexp>)) (dp re (<re-plus> => <regexp>)) ; (dg re-plus (r|<regexp> => <regexp>)) (dm re-plus (r|<re-unit> => <regexp>) re-unit) (dm re-plus (r|<re-zero> => <regexp>) re-zero) (dm re-plus (r|<regexp> => <regexp>) (new <re-plus> re r)) ; ;; re-opt (r|<regexp> => <regexp>) ;; construct a regexp that matches either r or epsilon ; (dc <re-opt> (<regexp>)) (dp re (<re-opt> => <regexp>)) ; (dg re-opt (r|<regexp> => <regexp>)) (dm re-opt (r|<re-unit> => <regexp>) re-unit) (dm re-opt (r|<re-zero> => <regexp>) re-unit) (dm re-opt (r|<regexp> => <regexp>) (new <re-opt> re r)) ; ;; re-cat (a|<regexp> rest|... => <regexp>) ;; Construct the regexp that matches the concatenation of a and every ;; successive regexp it receives as an argument. ; (dc <re-cat> (<regexp>)) (dp fst (<re-cat> => <regexp>)) (dp snd (<re-cat> => <regexp>)) ; ;; re-cat* (a|<regexp> b|<regexp> => <regexp>) ;; a utility function that makes an re that matches the concatenation of a & b. ; (dg re-cat* (a|<regexp> b|<regexp> => <regexp>)) (dm re-cat* (a|<re-zero> b|<regexp> => <regexp>) re-zero) (dm re-cat* (a|<regexp> b|<re-zero> => <regexp>) re-zero) (dm re-cat* (a|<re-zero> b|<re-zero> => <regexp>) re-zero) (dm re-cat* (a|<re-unit> b|<regexp> => <regexp>) b) (dm re-cat* (a|<regexp> b|<re-unit> => <regexp>) a) (dm re-cat* (a|<re-unit> b|<re-unit> => <regexp>) re-unit) (dm re-cat* (a|<regexp> b|<regexp> => <regexp>) (new <re-cat> fst a snd b)) ; (df re-cat (a|<regexp> rest|... => <regexp>) (if (empty? rest) a (re-cat* a (rep loop ((i 0)) (if (= i (len rest)) re-unit (re-cat* ?rest i (loop (+ i 1)))))))) ; ; (dc <re-alt> (<regexp>)) (dp fst (<re-alt> => <regexp>)) (dp snd (<re-alt> => <regexp>)) ; ;; re-alt* (a|<regexp> b|<regexp> => <regexp>) ;; re-alt* is a utility fn creates a regexp that can match EITHER a OR b ;; (dg re-alt* (a|<regexp> b|<regexp> => <regexp>)) (dm re-alt* (a|<re-zero> b|<re-zero> => <regexp>) re-zero) (dm re-alt* (a|<re-zero> b|<regexp> => <regexp>) b) (dm re-alt* (a|<regexp> b|<re-zero> => <regexp>) a) (dm re-alt* (a|<regexp> b|<regexp> => <regexp>) (new <re-alt> fst a snd b)) ; ;; re-alt (a|<regexp> rest|... => <regexp>) ;; re-alt takes all of the regexps it receives as arguments and ;; constructs a new regexp that can match any of them. (df re-alt (a|<regexp> rest|... => <regexp>) (fold re-alt* a rest)) ; ;; nullable? (r|<regexp> => <log>) ;; if (nullable? r) => #t, then r matches the zero-length string ; (dg nullable? (r|<regexp> => <log>)) (dm nullable? (r|<re-zero> => <log>) #f) (dm nullable? (r|<re-unit> => <log>) #t) (dm nullable? (r|<re-sym> => <log>) #f) (dm nullable? (r|<re-any> => <log>) #f) (dm nullable? (r|<re-star> => <log>) #t) (dm nullable? (r|<re-plus> => <log>) (nullable? (re r))) (dm nullable? (r|<re-opt> => <log>) #t) (dm nullable? (r|<re-cat> => <log>) (and (nullable? (fst r)) (nullable? (snd r)))) (dm nullable? (r|<re-alt> => <log>) (or (nullable? (fst r)) (nullable? (snd r)))) ; ;; resid (e|<regexp> x|<any> => <regexp>)) ;; return a new regexp that recognizes the language L(e*) = {w | xw \in L(e)} ; (dg resid (e|<regexp> x|<any> => <regexp>)) (dm resid (e|<re-zero> x|<any> => <regexp>) re-zero) (dm resid (e|<re-unit> x|<any> => <regexp>) re-zero) (dm resid (e|<re-sym> x|<any> => <regexp>) (if (= x (val e)) re-unit re-zero)) (dm resid (e|<re-any> x|<any> => <regexp>) re-unit) (dm resid (e|<re-star> x|<any> => <regexp>) (re-cat (resid (re e) x) (re-star (re e)))) (dm resid (e|<re-plus> x|<any> => <regexp>) (re-cat (resid (re e) x) (re-star (re e)))) (dm resid (e|<re-opt> x|<any> => <regexp>) (resid (re e) x)) (dm resid (e|<re-cat> x|<any> => <regexp>) (def e1 (fst e)) (def e2 (snd e)) (if (nullable? e1) (re-alt (re-cat (resid e1 x) e2) (resid e2 x)) (re-cat (resid e1 x) e2))) (dm resid (e|<re-alt> x|<any> => <regexp>) (re-alt (resid (fst e) x) (resid (snd e) x))) ; ;; match? (r|<regexp> s|<seq> => <log>) ;; (match? r s) is true, iff s is an element of L(r). ; (df match? (r|<regexp> s|<seq> => <log>) (nullable? (fold resid r s))) ; ;; re-seq (s|<seq> => <regexp>) ;; Take a sequence of objects and return a regexp that matches them in ; (df re-seq (s|<seq> => <regexp>) (fold {acc x . (re-cat (re-sym x) acc)} re-unit (rev s))) ; (df re-choose (s|<seq> => <regexp>) (if (zero? (len s)) re-unit (rep loop ((i 0)) (if (= i (- (len s) 1)) (re-sym ?s i) (re-alt (re-sym ?s i) (loop (+ i 1))))))) ; ;; ; (export <regexp>) (export re-zero) (export re-unit) (export re-sym) (export re-any) (export re-opt) (export re-star) (export re-plus) (export re-cat) (export re-alt) (export re-seq) (export re-choose) ; (export match?) ; ;; -*-*-*- EMD re.goo -*-*-*-