9 case class CHAR(c: Char) extends Rexp |
9 case class CHAR(c: Char) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
12 case class STAR(r: Rexp) extends Rexp |
13 case class NTIMES(r: Rexp, n: Int) extends Rexp |
13 case class NTIMES(r: Rexp, n: Int) extends Rexp |
14 |
|
15 |
|
16 // string of a regular expressions - for testing purposes |
|
17 def string(r: Rexp) : String = r match { |
|
18 case ZERO => "0" |
|
19 case ONE => "1" |
|
20 case CHAR(c) => c.toString |
|
21 case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" |
|
22 case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" |
|
23 case SEQ(ONE, CHAR(c)) => s"1${c}" |
|
24 case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
|
25 case STAR(r) => s"(${string(r)})*" |
|
26 case NTIMES(r, n) => s"(${string(r)}){${n}}" |
|
27 } |
|
28 |
|
29 // size of a regular expressions - for testing purposes |
|
30 def size(r: Rexp) : Int = r match { |
|
31 case ZERO => 1 |
|
32 case ONE => 1 |
|
33 case CHAR(_) => 1 |
|
34 case ALT(r1, r2) => 1 + size(r1) + size(r2) |
|
35 case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
|
36 case STAR(r) => 1 + size(r) |
|
37 case NTIMES(r, _) => 1 + size(r) |
|
38 } |
|
39 |
14 |
40 |
15 |
41 |
16 |
42 // nullable function: tests whether the regular |
17 // nullable function: tests whether the regular |
43 // expression can recognise the empty string |
18 // expression can recognise the empty string |
63 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
38 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
64 case NTIMES(r, i) => |
39 case NTIMES(r, i) => |
65 if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |
40 if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |
66 } |
41 } |
67 |
42 |
68 |
43 def simp(r: Rexp) : Rexp = r match { |
69 |
44 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
70 def simp(r: Rexp, seen: Set[Rexp]) : (Rexp, Set[Rexp]) = r match { |
45 case (ZERO, r2s) => r2s |
71 case ALT(r1, r2) => { |
46 case (r1s, ZERO) => r1s |
72 val (r1s, seen1) = simp(r1, seen) |
47 case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |
73 val (r2s, seen2) = simp(r2, seen1) |
|
74 (r1s, r2s) match { |
|
75 case (ZERO, r2s) => (r2s, seen2) |
|
76 case (r1s, ZERO) => (r1s, seen2) |
|
77 case (r1s, r2s) => (ALT(r1s, r2s), seen2) |
|
78 } |
|
79 } |
48 } |
80 case SEQ(r1, r2) => { |
49 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
81 val (r1s, _) = simp(r1, Set()) |
50 case (ZERO, _) => ZERO |
82 val (r2s, _) = simp(r2, Set()) |
51 case (_, ZERO) => ZERO |
83 if (seen.contains(SEQ(r1s, r2s))) (ZERO, seen) |
52 case (ONE, r2s) => r2s |
84 else (r1s, r2s) match { |
53 case (r1s, ONE) => r1s |
85 case (ZERO, _) => (ZERO, seen) |
54 case (r1s, r2s) => SEQ(r1s, r2s) |
86 case (_, ZERO) => (ZERO, seen) |
|
87 case (ONE, r2s) => (r2s, seen + r2s) |
|
88 case (r1s, ONE) => (r1s, seen + r1s) |
|
89 case (r1s, r2s) => (SEQ(r1s, r2s), seen + SEQ(r1s, r2s)) |
|
90 } |
|
91 } |
55 } |
92 case r => if (seen.contains(r)) (ZERO, seen) else (r, seen + r) |
56 case r => r |
93 } |
57 } |
94 |
58 |
95 |
59 |
96 // derivative w.r.t. a string (iterates der) |
60 // derivative w.r.t. a string (iterates der) |
97 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
61 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
98 case Nil => r |
62 case Nil => r |
99 case c::s => ders(s, simp(der(c, r), Set())._1) |
63 case c::s => ders(s, simp(der(c, r))) |
100 } |
64 } |
101 |
65 |
102 |
66 |
103 // main matcher function |
67 // main matcher function |
104 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
68 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
105 |
69 |
106 |
70 |
107 //one or zero |
71 //one or zero |
108 def OPT(r: Rexp) = ALT(r, ONE) |
72 def OPT(r: Rexp) = ALT(r, ONE) |
109 |
|
110 |
|
111 |
|
112 |
|
113 |
|
114 |
|
115 |
|
116 |
|
117 |
|
118 def time_needed[T](i: Int, code: => T) = { |
|
119 val start = System.nanoTime() |
|
120 for (j <- 1 to i) code |
|
121 val end = System.nanoTime() |
|
122 (end - start)/(i * 1.0e9) |
|
123 } |
|
124 |
|
125 |
|
126 // star example |
|
127 val Tstar = STAR(STAR(STAR(CHAR('a')))) |
|
128 |
|
129 string(ders("".toList, Tstar)) |
|
130 size(ders("".toList, Tstar)) // 4 |
|
131 string(ders("a".toList, Tstar)) |
|
132 size(ders("a".toList, Tstar)) // 11 |
|
133 string(ders("aa".toList, Tstar)) |
|
134 size(ders("aa".toList, Tstar)) // 11 |
|
135 string(ders("aaa".toList, Tstar)) |
|
136 size(ders("aaa".toList, Tstar)) // 11 |
|
137 string(ders("aaaa".toList, Tstar)) |
|
138 size(ders("aaaa".toList, Tstar)) // 11 |
|
139 string(ders("aaaa".toList, Tstar)) |
|
140 size(ders("aaaaa".toList, Tstar)) // 11 |
|
141 string(ders("aaaab".toList, Tstar)) |
|
142 size(ders("aaaaab".toList, Tstar)) // 1 |
|
143 |
|
144 // test: ("a" | "aa")* |
|
145 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) |
|
146 |
|
147 for (i <- 1 to 100 by 1) { |
|
148 println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + |
|
149 " size: " + size(ders(("a" * i).toList, EVIL3))) |
|
150 } |
|
151 |
|
152 |
|
153 println("start " + string(EVIL3) + " " + size(EVIL3)) |
|
154 val t1 = der('a', EVIL3) |
|
155 println(string(t1) + " " + size(t1)) |
|
156 val t1s = simp(t1, Set())._1 |
|
157 println("simplified " + string(t1s) + " " + size(t1s)) |
|
158 val t2 = der('a', t1s) |
|
159 println(string(t2) + " " + size(t2)) |
|
160 val t2s = simp(t2, Set())._1 |
|
161 println("simplified " + string(t2s) + " " + size(t2s)) |
|
162 val t3 = der('a', t2s) |
|
163 println(string(t3) + " " + size(t3)) |
|
164 val t3s = simp(t3, Set())._1 |
|
165 println("simplified " + string(t3s) + " " + size(t3s)) |
|
166 val t4 = der('a', t3s) |
|
167 val t4s = simp(t4, Set())._1 |
|
168 |
|
169 |
|
170 |
|
171 |
|
172 |
|
173 |
|
174 |
|
175 |
|
176 println(string(t4) + " " + size(t4)) |
|
177 println("simplified " + string(t4s) + " " + size(t4s)) |
|
178 |
73 |
179 |
74 |
180 // Test Cases |
75 // Test Cases |
181 |
76 |
182 //evil regular expressions: (a?){n} a{n} and (a*)* b |
77 //evil regular expressions: (a?){n} a{n} and (a*)* b |
233 size(ders("aaa".toList, EVIL2)) // 8 |
127 size(ders("aaa".toList, EVIL2)) // 8 |
234 size(ders("aaaa".toList, EVIL2)) // 8 |
128 size(ders("aaaa".toList, EVIL2)) // 8 |
235 size(ders("aaaaa".toList, EVIL2)) // 8 |
129 size(ders("aaaaa".toList, EVIL2)) // 8 |
236 |
130 |
237 |
131 |
|
132 // test: ("a" | "aa")* |
|
133 val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) |
|
134 |
|
135 for (i <- 1 to 29 by 1) { |
|
136 println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + |
|
137 " size: " + size(ders(("a" * i).toList, EVIL3))) |
|
138 } |
238 |
139 |
239 |
140 |
240 |
141 |
241 // Examples from the Sulzmann paper |
|
242 val sulzmann = STAR(ALT(ALT(CHAR('a'), CHAR('b')), SEQ(CHAR('a'), CHAR('b')))) |
|
243 |
|
244 |
|
245 for (i <- 1 to 4501 by 500) { |
|
246 println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "a" * i)))) |
|
247 } |
|
248 |
|
249 for (i <- 1 to 4501 by 500) { |
|
250 println(i + ": " + "%.5f".format(time_needed(1, matcher(sulzmann, "ab" * i)))) |
|
251 } |
|
252 |
|
253 size(ders("".toList, EVIL2)) // 5 |
|
254 size(ders("a".toList, EVIL2)) // 8 |
|
255 size(ders("aa".toList, EVIL2)) // 8 |
|
256 size(ders("aaa".toList, EVIL2)) // 8 |
|
257 size(ders("aaaa".toList, EVIL2)) // 8 |
|
258 size(ders("aaaaa".toList, EVIL2)) // 8 |
|
259 |
|
260 |
|
261 |
|
262 (((1 + 1a) ~ ((a + aa))*) + (((0 + 1) ~ ((a + aa))*) + ((1 + 1a) ~ ((a + aa))*))) |
|