|
1 // CW1 |
|
2 |
|
3 abstract class Rexp |
|
4 case object ZERO extends Rexp |
|
5 case object ONE extends Rexp |
|
6 case class CHAR(c: Char) extends Rexp |
|
7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
9 case class STAR(r: Rexp) extends Rexp |
|
10 |
|
11 // extended Rexps |
|
12 case class RANGE(s: Set[Char]) extends Rexp |
|
13 case class PLUS(r: Rexp) extends Rexp |
|
14 case class OPTIONAL(r: Rexp) extends Rexp |
|
15 case class NTIMES(r: Rexp, n: Int) extends Rexp |
|
16 case class UPTO(r: Rexp, n: Int) extends Rexp |
|
17 case class FROM(r: Rexp, n: Int) extends Rexp |
|
18 case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp |
|
19 case class NOT(r: Rexp) extends Rexp |
|
20 |
|
21 // functions |
|
22 case class CFUN(f: Char => Boolean) extends Rexp |
|
23 |
|
24 // abbreviations |
|
25 def FCHAR(c: Char) = CFUN((a: Char) => a == c) |
|
26 def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a)) |
|
27 val FALL = CFUN(_ => true) |
|
28 |
|
29 def nullable (r: Rexp) : Boolean = r match { |
|
30 case ZERO => false |
|
31 case ONE => true |
|
32 case CHAR(_) => false |
|
33 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
34 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
35 case STAR(_) => true |
|
36 |
|
37 case RANGE(_) => false |
|
38 case PLUS(r1) => nullable(r1) |
|
39 case OPTIONAL(_) => true |
|
40 case NTIMES(r1, i) => if (i == 0) true else nullable(r1) |
|
41 case UPTO(_, _) => true |
|
42 case FROM(r1, i) => if (i == 0) true else nullable(r1) |
|
43 case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1) |
|
44 case NOT(r1) => !nullable(r1) |
|
45 |
|
46 case CFUN(f) => false |
|
47 } |
|
48 |
|
49 |
|
50 def der (c: Char, r: Rexp) : Rexp = r match { |
|
51 case ZERO => ZERO |
|
52 case ONE => ZERO |
|
53 case CHAR(d) => if (c == d) ONE else ZERO |
|
54 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
55 case SEQ(r1, r2) => |
|
56 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
57 else SEQ(der(c, r1), r2) |
|
58 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
|
59 |
|
60 case RANGE(s) => |
|
61 if (s.contains(c)) ONE else ZERO |
|
62 case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) |
|
63 case OPTIONAL(r1) => der(c, r1) |
|
64 case NTIMES(r, i) => |
|
65 if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) |
|
66 case UPTO(r1, i) => |
|
67 if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) |
|
68 case FROM(r1, i) => |
|
69 if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else |
|
70 SEQ(der(c, r1), FROM(r1, i - 1)) |
|
71 case BETWEEN(r1, i, j) => |
|
72 if (i == 0) { |
|
73 if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1)) |
|
74 } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1)) |
|
75 case NOT(r1) => NOT(der(c, r1)) |
|
76 |
|
77 case CFUN(f) => if (f(c)) ONE else ZERO |
|
78 } |
|
79 |
|
80 |
|
81 def simp(r: Rexp) : Rexp = r match { |
|
82 case ALT(r1, r2) => (simp(r1), simp(r2)) match { |
|
83 case (ZERO, r2s) => r2s |
|
84 case (r1s, ZERO) => r1s |
|
85 case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) |
|
86 } |
|
87 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
|
88 case (ZERO, _) => ZERO |
|
89 case (_, ZERO) => ZERO |
|
90 case (ONE, r2s) => r2s |
|
91 case (r1s, ONE) => r1s |
|
92 case (r1s, r2s) => SEQ(r1s, r2s) |
|
93 } |
|
94 case r => r |
|
95 } |
|
96 |
|
97 def ders(s: List[Char], r: Rexp) : Rexp = s match { |
|
98 case Nil => r |
|
99 case c::s => ders(s, simp(der(c, r))) |
|
100 } |
|
101 |
|
102 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) |
|
103 |
|
104 |
|
105 |
|
106 val Regex31 = NTIMES(CHAR('a'),3) |
|
107 val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3) |
|
108 val Regex33 = UPTO(CHAR('a'),3) |
|
109 val Regex34 = UPTO(OPTIONAL(CHAR('a')),3) |
|
110 val Regex35 = BETWEEN(CHAR('a'),3,5) |
|
111 val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5) |
|
112 val string31 = "" |
|
113 val string32 = "a" |
|
114 val string33 = "aa" |
|
115 val string34 = "aaa" |
|
116 val string35 = "aaaa" |
|
117 val string36 = "aaaaa" |
|
118 val string37 = "aaaaaa" |
|
119 |
|
120 |
|
121 println("Question3") |
|
122 println(matcher(Regex31, string31)) |
|
123 println(matcher(Regex31, string32)) |
|
124 println(matcher(Regex31, string33)) |
|
125 println(matcher(Regex31, string34)) |
|
126 println(matcher(Regex31, string35)) |
|
127 println(matcher(Regex31, string36)) |
|
128 println(matcher(Regex31, string37)); println("") |
|
129 println(matcher(Regex32, string31)) |
|
130 println(matcher(Regex32, string32)) |
|
131 println(matcher(Regex32, string33)) |
|
132 println(matcher(Regex32, string34)) |
|
133 println(matcher(Regex32, string35)) |
|
134 println(matcher(Regex32, string36)) |
|
135 println(matcher(Regex32, string37)); println("") |
|
136 println(matcher(Regex33, string31)) |
|
137 println(matcher(Regex33, string32)) |
|
138 println(matcher(Regex33, string33)) |
|
139 println(matcher(Regex33, string34)) |
|
140 println(matcher(Regex33, string35)) |
|
141 println(matcher(Regex33, string36)) |
|
142 println(matcher(Regex33, string37)); println("") |
|
143 println(matcher(Regex34, string31)) |
|
144 println(matcher(Regex34, string32)) |
|
145 println(matcher(Regex34, string33)) |
|
146 println(matcher(Regex34, string34)) |
|
147 println(matcher(Regex34, string35)) |
|
148 println(matcher(Regex34, string36)) |
|
149 println(matcher(Regex34, string37)); println("") |
|
150 println(matcher(Regex35, string31)) |
|
151 println(matcher(Regex35, string32)) |
|
152 println(matcher(Regex35, string33)) |
|
153 println(matcher(Regex35, string34)) |
|
154 println(matcher(Regex35, string35)) |
|
155 println(matcher(Regex35, string36)) |
|
156 println(matcher(Regex35, string37)); println("") |
|
157 println(matcher(Regex36, string31)) |
|
158 println(matcher(Regex36, string32)) |
|
159 println(matcher(Regex36, string33)) |
|
160 println(matcher(Regex36, string34)) |
|
161 println(matcher(Regex36, string35)) |
|
162 println(matcher(Regex36, string36)) |
|
163 println(matcher(Regex36, string37)); println("") |
|
164 |
|
165 |
|
166 val RegexX = BETWEEN(CHAR('a'), 0, 5) |
|
167 val stringX = "" |
|
168 println(matcher(RegexX, stringX)) |
|
169 |
|
170 |
|
171 |
|
172 val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
173 val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
174 val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
175 |
|
176 val matchA = (c:Char) => c == 'a' |
|
177 |
|
178 val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA)) |
|
179 val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA))) |
|
180 |
|
181 val reg_1_mod = PLUS(PLUS(reg_1)) |
|
182 val reg_2_mod = PLUS(PLUS(reg_2)) |
|
183 |
|
184 |
|
185 matcher(reg_1_mod, str0) |
|
186 matcher(reg_1_mod, str1) |
|
187 matcher(reg_1_mod, str2) |
|
188 matcher(reg_2_mod, str0) |
|
189 matcher(reg_2_mod, str1) |
|
190 matcher(reg_2_mod, str2) |
|
191 |
|
192 |
|
193 |
|
194 |
|
195 |
|
196 //Q3: matcher test (table) |
|
197 |
|
198 // a^{3} |
|
199 val re1 = NTIMES(CHAR('a'), 3) |
|
200 |
|
201 matcher(re1, "") //false |
|
202 matcher(re1, "a") //false |
|
203 matcher(re1, "aa") //false |
|
204 matcher(re1, "aaa") //true |
|
205 matcher(re1, "aaaaa") //false |
|
206 matcher(re1, "aaaaaa") //false |
|
207 |
|
208 // (a?)^{3} |
|
209 val re2 = NTIMES(OPTIONAL(CHAR('a')), 3) |
|
210 |
|
211 matcher(re2, "") //true |
|
212 matcher(re2, "a") //true |
|
213 matcher(re2, "aa") //true |
|
214 matcher(re2, "aaa") //true |
|
215 matcher(re2, "aaaaa") //false |
|
216 matcher(re2, "aaaaaa") //false |
|
217 |
|
218 // (a)^{..3} |
|
219 val re3 = UPTO((CHAR('a')), 3) |
|
220 |
|
221 matcher(re3, "") //true |
|
222 matcher(re3, "a") //true |
|
223 matcher(re3, "aa") //true |
|
224 matcher(re3, "aaa") //true |
|
225 matcher(re3, "aaaaa") //false |
|
226 matcher(re3, "aaaaaa") //false |
|
227 |
|
228 // (a?)^{..3} |
|
229 val re4 = UPTO(OPTIONAL(CHAR('a')), 3) |
|
230 |
|
231 matcher(re4, "") //true |
|
232 matcher(re4, "a") //true |
|
233 matcher(re4, "aa") //true |
|
234 matcher(re4, "aaa") //true |
|
235 matcher(re4, "aaaaa") //false |
|
236 matcher(re4, "aaaaaa") //false |
|
237 |
|
238 // (a)^{3..5} |
|
239 val re5 = BETWEEN(CHAR('a'), 3, 5) |
|
240 |
|
241 matcher(re5, "") //false |
|
242 matcher(re5, "a") //false |
|
243 matcher(re5, "aa") //false |
|
244 matcher(re5, "aaa") //true |
|
245 matcher(re5, "aaaaa") //true |
|
246 matcher(re5, "aaaaaa") //false |
|
247 |
|
248 // (a?)^{3..5} |
|
249 val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5) |
|
250 |
|
251 matcher(re6, "") //true |
|
252 matcher(re6, "a") //true |
|
253 matcher(re6, "aa") //true |
|
254 matcher(re6, "aaa") //true |
|
255 matcher(re6, "aaaaa") //true |
|
256 matcher(re6, "aaaaaa") //false |
|
257 |
|
258 //Q5: regular expression for email addresses |
|
259 |
|
260 val alphaNum = ('a' to 'z').toSet ++ ('0' to '9') |
|
261 val Q5email = SEQ( |
|
262 PLUS(RANGE(alphaNum + '_' + '-' + '.')), |
|
263 SEQ( |
|
264 CHAR('@'), |
|
265 SEQ( |
|
266 PLUS(RANGE(alphaNum + '-' + '.')), |
|
267 SEQ( |
|
268 CHAR('.'), |
|
269 BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6) |
|
270 ) |
|
271 ) |
|
272 ) |
|
273 ) |
|
274 |
|
275 ders("nicolas.volken@kcl.ac.uk".toList, Q5email) |
|
276 |
|
277 // Q6 |
|
278 val Q6 = SEQ(CHAR('/'), |
|
279 SEQ(CHAR('*'), |
|
280 SEQ( |
|
281 |
|
282 NOT( |
|
283 SEQ( |
|
284 STAR(FALL), |
|
285 SEQ( |
|
286 CHAR('*'), |
|
287 SEQ( |
|
288 CHAR('/'), |
|
289 STAR(FALL) |
|
290 ) |
|
291 ) |
|
292 ) |
|
293 ) |
|
294 |
|
295 , |
|
296 SEQ(CHAR('*'), |
|
297 CHAR('/') |
|
298 ) |
|
299 ) |
|
300 ) |
|
301 ) |
|
302 |
|
303 matcher(Q6, "/**/") |
|
304 matcher(Q6, "/*foobar*/") |
|
305 matcher(Q6, "/*test*/test*/") |
|
306 matcher(Q6, "/*test/*test*/") |
|
307 |
|
308 // Q7 |
|
309 |
|
310 val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) |
|
311 val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a'))) |
|
312 |
|
313 val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
314 val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
315 val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
|
316 |
|
317 matcher(PLUS(PLUS(Q7r1)), Q7str5) |
|
318 matcher(PLUS(PLUS(Q7r2)), Q7str5) |
|
319 |
|
320 matcher(PLUS(PLUS(Q7r1)), Q7str6) |
|
321 matcher(PLUS(PLUS(Q7r2)), Q7str6) |
|
322 |
|
323 matcher(PLUS(PLUS(Q7r1)), Q7str7) |
|
324 matcher(PLUS(PLUS(Q7r2)), Q7str7) |
|
325 |