|
1 // regular expressions |
1 abstract class Rexp |
2 abstract class Rexp |
2 |
3 |
3 case object NULL extends Rexp |
4 case object NULL extends Rexp |
4 case object EMPTY extends Rexp |
5 case object EMPTY extends Rexp |
5 case class CHAR(c: Char) extends Rexp |
6 case class CHAR(c: Char) extends Rexp |
6 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
7 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
8 case class STAR(r: Rexp) extends Rexp |
9 case class STAR(r: Rexp) extends Rexp |
9 |
10 |
10 // whether it can match the empty string |
11 // some convenience for typing in regular expressions |
|
12 implicit def string2rexp(s : String) : Rexp = { |
|
13 s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) ) |
|
14 } |
|
15 |
|
16 // for example |
|
17 println(STAR("abc")) |
|
18 |
|
19 // produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) |
|
20 |
|
21 |
|
22 |
|
23 // a simple-minded regular expression matcher: |
|
24 // it loops for examples like STAR(EMPTY) with |
|
25 // strings this regular expression does not match |
|
26 |
|
27 def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { |
|
28 case (NULL::rs, s) => false |
|
29 case (EMPTY::rs, s) => smatchers(rs, s) |
|
30 case (CHAR(c)::rs, Nil) => false |
|
31 case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) |
|
32 case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) |
|
33 case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) |
|
34 case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) |
|
35 case (Nil, s) => s == Nil |
|
36 } |
|
37 |
|
38 def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) |
|
39 |
|
40 // regular expression: a |
|
41 println(smatcher(CHAR('a'), "ab")) |
|
42 |
|
43 // regular expression: a + (b o c) |
|
44 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) |
|
45 |
|
46 // regular expression: a + (b o c) |
|
47 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) |
|
48 |
|
49 // loops for regular expression epsilon* |
|
50 //println(smatcher(STAR(EMPTY), "a")) |
|
51 |
|
52 |
|
53 |
|
54 // Regular expression matcher that works properly |
|
55 //================================================ |
|
56 |
|
57 // nullable function: tests whether the regular |
|
58 // expression can recognise the empty string |
11 def nullable (r: Rexp) : Boolean = r match { |
59 def nullable (r: Rexp) : Boolean = r match { |
12 case NULL => false |
60 case NULL => false |
13 case EMPTY => true |
61 case EMPTY => true |
14 case CHAR(_) => false |
62 case CHAR(_) => false |
15 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
63 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
16 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
64 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
17 case STAR(_) => true |
65 case STAR(_) => true |
18 } |
66 } |
19 |
67 |
20 // derivative of a regular expression |
68 // derivative of a regular expression w.r.t. a character |
21 def deriv (r: Rexp, c: Char) : Rexp = r match { |
69 def der (c: Char, r: Rexp) : Rexp = r match { |
22 case NULL => NULL |
70 case NULL => NULL |
23 case EMPTY => NULL |
71 case EMPTY => NULL |
24 case CHAR(d) => if (c == d) EMPTY else NULL |
72 case CHAR(d) => if (c == d) EMPTY else NULL |
25 case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) |
73 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
26 case SEQ(r1, r2) => |
74 case SEQ(r1, r2) => |
27 if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) |
75 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
28 else SEQ(deriv(r1, c), r2) |
76 else SEQ(der(c, r1), r2) |
29 case STAR(r) => SEQ(deriv(r, c), STAR(r)) |
77 case STAR(r) => SEQ(der(c, r), STAR(r)) |
30 } |
78 } |
31 |
79 |
32 def derivs (r: Rexp, s: List[Char]) : Rexp = s match { |
80 // derivative w.r.t. a string (iterates der) |
|
81 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
33 case Nil => r |
82 case Nil => r |
34 case c::cs => derivs(deriv(r, c), cs) |
83 case c::s => ders(s, der(c, r)) |
35 } |
84 } |
36 |
85 |
37 // regular expression matching |
86 // main matcher function |
38 def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList)) |
87 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
39 |
88 |
40 /* Examples */ |
|
41 |
89 |
42 println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab")) |
90 //examples |
43 println(matches(STAR(CHAR('a')),"aaa")) |
|
44 |
91 |
45 /* Convenience using implicits */ |
92 println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) |
46 implicit def string2rexp(s : String) : Rexp = { |
93 println(matcher(ALT(STAR("a"), STAR("b")), "")) |
47 s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) ) |
94 println(matcher("abc", "")) |
48 } |
95 println(matcher(STAR(ALT(EMPTY, "a")), "")) |
|
96 println(matcher(STAR(EMPTY), "a")) |
|
97 println(matcher("cab","cab")) |
|
98 println(matcher(STAR("a"),"aaa")) |
|
99 println(matcher("cab" ,"cab")) |
|
100 println(matcher(STAR("a"),"aaa")) |
49 |
101 |
50 println(matches("cab" ,"cab")) |
|
51 println(matches(STAR("a"),"aaa")) |
|
52 println(matches(STAR("a"),"aaab")) |
|