|
1 // regular expressions |
|
2 abstract class Rexp |
|
3 |
|
4 case object NULL extends Rexp |
|
5 case object EMPTY extends Rexp |
|
6 case class CHAR(c: Char) extends Rexp |
|
7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
9 case class STAR(r: Rexp) extends Rexp |
|
10 |
|
11 // some convenience for typing in regular expressions |
|
12 def charlist2rexp(s : List[Char]) : Rexp = s match { |
|
13 case Nil => EMPTY |
|
14 case c::Nil => CHAR(c) |
|
15 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
16 } |
|
17 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
18 |
|
19 |
|
20 // for example |
|
21 println(STAR("abc")) |
|
22 |
|
23 // produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) |
|
24 |
|
25 |
|
26 |
|
27 // a simple-minded regular expression matcher: |
|
28 // it loops for examples like STAR(EMPTY) with |
|
29 // strings this regular expression does not match |
|
30 |
|
31 def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { |
|
32 case (NULL::rs, s) => false |
|
33 case (EMPTY::rs, s) => smatchers(rs, s) |
|
34 case (CHAR(c)::rs, Nil) => false |
|
35 case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) |
|
36 case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) |
|
37 case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) |
|
38 case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) |
|
39 case (Nil, s) => s == Nil |
|
40 } |
|
41 |
|
42 def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) |
|
43 |
|
44 // regular expression: a |
|
45 println(smatcher(CHAR('a'), "ab")) |
|
46 |
|
47 // regular expression: a + (b o c) |
|
48 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) |
|
49 |
|
50 // regular expression: a + (b o c) |
|
51 println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) |
|
52 |
|
53 // loops for regular expression epsilon* |
|
54 //println(smatcher(STAR(EMPTY), "a")) |
|
55 |
|
56 |
|
57 |
|
58 // Regular expression matcher that works properly |
|
59 //================================================ |
|
60 |
|
61 // nullable function: tests whether the regular |
|
62 // expression can recognise the empty string |
|
63 def nullable (r: Rexp) : Boolean = r match { |
|
64 case NULL => false |
|
65 case EMPTY => true |
|
66 case CHAR(_) => false |
|
67 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
68 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
69 case STAR(_) => true |
|
70 } |
|
71 |
|
72 // derivative of a regular expression w.r.t. a character |
|
73 def der (c: Char, r: Rexp) : Rexp = r match { |
|
74 case NULL => NULL |
|
75 case EMPTY => NULL |
|
76 case CHAR(d) => if (c == d) EMPTY else NULL |
|
77 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
78 case SEQ(r1, r2) => |
|
79 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
80 else SEQ(der(c, r1), r2) |
|
81 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
82 } |
|
83 |
|
84 // derivative w.r.t. a string (iterates der) |
|
85 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
|
86 case Nil => r |
|
87 case c::s => ders(s, der(c, r)) |
|
88 } |
|
89 |
|
90 // main matcher function |
|
91 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
|
92 |
|
93 |
|
94 //examples |
|
95 |
|
96 println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) |
|
97 println(matcher(ALT(STAR("a"), STAR("b")), "")) |
|
98 println(matcher("abc", "")) |
|
99 println(matcher(STAR(ALT(EMPTY, "a")), "")) |
|
100 println(matcher(STAR(EMPTY), "a")) |
|
101 println(matcher("cab","cab")) |
|
102 println(matcher(STAR("a"),"aaa")) |
|
103 println(matcher("cab" ,"cab")) |
|
104 println(matcher(STAR("a"),"aaa")) |
|
105 |
|
106 |