9 case class CHAR(c: Char) extends Rexp |
9 case class CHAR(c: Char) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
13 case class NTIMES(r: Rexp, n: Int) extends Rexp |
13 case class NTIMES(r: Rexp, n: Int) extends Rexp |
|
14 |
|
15 |
|
16 // string of a regular expressions - for testing purposes |
|
17 def string(r: Rexp) : String = r match { |
|
18 case ZERO => "0" |
|
19 case ONE => "1" |
|
20 case CHAR(c) => c.toString |
|
21 case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" |
|
22 case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" |
|
23 case SEQ(ONE, CHAR(c)) => s"1${c}" |
|
24 case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
|
25 case STAR(r) => s"(${string(r)})*" |
|
26 case NTIMES(r, n) => s"(${string(r)}){${n}}" |
|
27 } |
|
28 |
|
29 // size of a regular expressions - for testing purposes |
|
30 def size(r: Rexp) : Int = r match { |
|
31 case ZERO => 1 |
|
32 case ONE => 1 |
|
33 case CHAR(_) => 1 |
|
34 case ALT(r1, r2) => 1 + size(r1) + size(r2) |
|
35 case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
36 case STAR(r) => 1 + size(r) |
|
37 case NTIMES(r, _) => 1 + size(r) |
|
38 } |
|
39 |
14 |
40 |
15 |
41 |
16 // nullable function: tests whether the regular |
42 // nullable function: tests whether the regular |
17 // expression can recognise the empty string |
43 // expression can recognise the empty string |
18 def nullable (r: Rexp) : Boolean = r match { |
44 def nullable (r: Rexp) : Boolean = r match { |
37 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
63 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
38 case NTIMES(r, i) => |
64 case NTIMES(r, i) => |
39 if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |
65 if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |
40 } |
66 } |
41 |
67 |
42 def simp(r: Rexp) : Rexp = r match { |
68 |
43 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
69 |
44 case (ZERO, r2s) => r2s |
70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match { |
45 case (r1s, ZERO) => r1s |
71 case ALT(r1, r2) => { |
46 case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |
72 val (r1s, seen1) = simp(r1, seen) |
|
73 val (r2s, seen2) = simp(r2, seen1) |
|
74 (r1s, r2s) match { |
|
75 case (ZERO, r2s) => (r2s, seen2) |
|
76 case (r1s, ZERO) => (r1s, seen2) |
|
77 case (r1s, r2s) => (ALT(r1s, r2s), seen2) |
|
78 } |
47 } |
79 } |
48 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
80 case SEQ(r1, r2) => { |
49 case (ZERO, _) => ZERO |
81 val (r1s, _) = simp(r1, Set()) |
50 case (_, ZERO) => ZERO |
82 val (r2s, _) = simp(r2, Set()) |
51 case (ONE, r2s) => r2s |
83 if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen) |
52 case (r1s, ONE) => r1s |
84 else (r1s, r2s) match { |
53 case (r1s, r2s) => SEQ(r1s, r2s) |
85 case (ZERO, _) => (ZERO, seen) |
|
86 case (_, ZERO) => (ZERO, seen) |
|
87 case (ONE, r2s) => (r2s, seen + r2s) |
|
88 case (r1s, ONE) => (r1s, seen + r1s) |
|
89 case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s)) |
|
90 } |
54 } |
91 } |
55 case r => r |
92 case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r) |
56 } |
93 } |
57 |
94 |
58 |
95 |
59 // derivative w.r.t. a string (iterates der) |
96 // derivative w.r.t. a string (iterates der) |
60 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
97 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
61 case Nil => r |
98 case Nil => r |
62 case c::s => ders(s, simp(der(c, r))) |
99 case c::s => ders(s, simp(der(c, r), Set())._1) |
63 } |
100 } |
64 |
101 |
65 |
102 |
66 // main matcher function |
103 // main matcher function |
67 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
68 |
105 |
69 |
106 |
70 //one or zero |
107 //one or zero |
71 def OPT(r: Rexp) = ALT(r, ONE) |
108 def OPT(r: Rexp) = ALT(r, ONE) |
|
109 |
|
110 |
|
111 |
|
112 |
|
113 |
|
114 |
|
115 |
|
116 |
|
117 |
|
118 def time_needed[T](i: Int, code: => T) = { |
|
119 val start = System.nanoTime() |
|
120 for (j <- 1 to i) code |
|
121 val end = System.nanoTime() |
|
122 (end - start)/(i * 1.0e9) |
|
123 } |
|
124 |
|
125 |
|
126 // star example |
|
127 val Tstar = STAR(STAR(STAR(CHAR('a')))) |
|
128 |
|
129 string(ders("".toList, Tstar)) |
|
130 size(ders("".toList, Tstar)) // 4 |
|
131 string(ders("a".toList, Tstar)) |
|
132 size(ders("a".toList, Tstar)) // 11 |
|
133 string(ders("aa".toList, Tstar)) |
|
134 size(ders("aa".toList, Tstar)) // 11 |
|
135 string(ders("aaa".toList, Tstar)) |
|
136 size(ders("aaa".toList, Tstar)) // 11 |
|
137 string(ders("aaaa".toList, Tstar)) |
|
138 size(ders("aaaa".toList, Tstar)) // 11 |
|
139 string(ders("aaaa".toList, Tstar)) |
|
140 size(ders("aaaaa".toList, Tstar)) // 11 |
|
141 string(ders("aaaab".toList, Tstar)) |
|
142 size(ders("aaaaab".toList, Tstar)) // 1 |
|
143 |
|
144 // test: ("a" | "aa")* |
|
145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) |
|
146 |
|
147 for (i <- 1 to 100 by 1) { |
|
148 println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + |
|
149 " size: " + size(ders(("a" * i).toList, EVIL3))) |
|
150 } |
|
151 |
|
152 |
|
153 println("start " + string(EVIL3) + " " + size(EVIL3)) |
|
154 val t1 = der('a', EVIL3) |
|
155 println(string(t1) + " " + size(t1)) |
|
156 val t1s = simp(t1, Set())._1 |
|
157 println("simplified " + string(t1s) + " " + size(t1s)) |
|
158 val t2 = der('a', t1s) |
|
159 println(string(t2) + " " + size(t2)) |
|
160 val t2s = simp(t2, Set())._1 |
|
161 println("simplified " + string(t2s) + " " + size(t2s)) |
|
162 val t3 = der('a', t2s) |
|
163 println(string(t3) + " " + size(t3)) |
|
164 val t3s = simp(t3, Set())._1 |
|
165 println("simplified " + string(t3s) + " " + size(t3s)) |
|
166 val t4 = der('a', t3s) |
|
167 val t4s = simp(t4, Set())._1 |
|
168 |
|
169 |
|
170 |
|
171 |
|
172 |
|
173 |
|
174 |
|
175 |
|
176 println(string(t4) + " " + size(t4)) |
|
177 println("simplified " + string(t4s) + " " + size(t4s)) |
72 |
178 |
73 |
179 |
74 // Test Cases |
180 // Test Cases |
75 |
181 |
76 //evil regular expressions: (a?){n} a{n} and (a*)* b |
182 //evil regular expressions: (a?){n} a{n} and (a*)* b |