author | Chengsong |
Sat, 08 Feb 2020 21:37:05 +0000 | |
changeset 126 | 1260b383ae2c |
parent 123 | fb7472a29058 |
child 148 | c8ef391dd6f7 |
permissions | -rwxr-xr-x |
0 | 1 |
import Element.elem |
2 |
import RexpRelated._ |
|
3 |
import RexpRelated.Rexp._ |
|
4 |
import Partial._ |
|
5 |
import BRexp._ |
|
6 |
import scala.collection.mutable.ListBuffer |
|
7 |
object Spiral{ |
|
8 |
||
9 |
val space = elem(" ") |
|
10 |
val corner = elem("+") |
|
11 |
||
12 |
def spiral(nEdges: Int, direction: Int): Element = { |
|
13 |
if(nEdges == 1) |
|
14 |
elem("+") |
|
15 |
else { |
|
16 |
val sp = spiral(nEdges - 1, (direction + 3) % 4) |
|
17 |
def verticalBar = elem('|', 1, sp.height) |
|
18 |
def horizontalBar = elem('-', sp.width, 1) |
|
19 |
if(direction == 0) |
|
20 |
(corner beside horizontalBar) above sp//(sp beside space) |
|
21 |
else if (direction == 1) |
|
22 |
sp beside (corner above verticalBar) |
|
23 |
else if (direction == 2) |
|
24 |
(space beside sp) above (horizontalBar beside corner) |
|
25 |
else |
|
26 |
(verticalBar above corner) beside (space above sp) |
|
27 |
} |
|
28 |
} |
|
29 |
val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c') |
|
30 |
def bregx_tree(r: BRexp): Element = regx_tree(berase(r)) |
|
31 |
def regx_tree(r: Rexp): Element = aregx_tree(internalise(r)) |
|
5 | 32 |
def annotated_tree(r: ARexp): Element = { |
33 |
r match { |
|
34 |
case ACHAR(bs, d) => { |
|
35 |
//val Some(d) = alphabet.find(f) |
|
36 |
d match { |
|
37 |
case '\n' => elem("\\n") |
|
38 |
case '\t' => elem("\\t") |
|
39 |
case ' ' => elem("space") |
|
7 | 40 |
case d => if(bs.isEmpty) elem(d.toString) else elem(d.toString++" ") beside elem(bs.toString) |
5 | 41 |
} |
42 |
} |
|
43 |
case AONE(bs) => { |
|
7 | 44 |
if(bs.isEmpty) elem("ONE") else elem("ONE ") beside elem(bs.toString) |
5 | 45 |
} |
46 |
case AZERO => { |
|
7 | 47 |
elem("ZERO") |
5 | 48 |
} |
49 |
case ASEQ(bs, r1, r2) => { |
|
7 | 50 |
annotated_binary_print("SEQ", r1, r2, bs) |
5 | 51 |
} |
52 |
case AALTS(bs, rs) => { |
|
53 |
//elem("Awaiting completion") |
|
7 | 54 |
annotated_list_print("ALT", rs, bs) |
5 | 55 |
} |
56 |
case ASTAR(bs, r) => { |
|
7 | 57 |
annotated_list_print("STA", List(r), bs) |
5 | 58 |
} |
59 |
} |
|
60 |
} |
|
0 | 61 |
def aregx_tree(r: ARexp): Element = { |
62 |
r match { |
|
63 |
case ACHAR(bs, d) => { |
|
64 |
//val Some(d) = alphabet.find(f) |
|
65 |
d match { |
|
66 |
case '\n' => elem("\\n") |
|
67 |
case '\t' => elem("\\t") |
|
68 |
case ' ' => elem("space") |
|
69 |
case d => elem(d.toString) |
|
70 |
} |
|
71 |
} |
|
72 |
case AONE(bs) => { |
|
73 |
elem("ONE") |
|
74 |
} |
|
75 |
case AZERO => { |
|
76 |
elem("ZERO") |
|
77 |
} |
|
78 |
case ASEQ(bs, r1, r2) => { |
|
79 |
binary_print("SEQ", r1, r2) |
|
80 |
} |
|
81 |
case AALTS(bs, rs) => { |
|
82 |
//elem("Awaiting completion") |
|
83 |
list_print("ALT", rs) |
|
84 |
} |
|
85 |
case ASTAR(bs, r) => { |
|
86 |
list_print("STA", List(r)) |
|
87 |
} |
|
88 |
} |
|
89 |
} |
|
90 |
val port = elem(" └-") |
|
91 |
def list_print(name: String, rs: List[ARexp]): Element = { |
|
92 |
rs match { |
|
93 |
case r::Nil => { |
|
94 |
val pref = aregx_tree(r) |
|
95 |
val head = elem(name) |
|
96 |
(head left_align (port up_align pref) ) |
|
97 |
} |
|
98 |
case r2::r1::Nil => { |
|
99 |
binary_print(name, r2, r1) |
|
100 |
} |
|
101 |
case r::rs1 => { |
|
102 |
val pref = aregx_tree(r) |
|
103 |
val head = elem(name) |
|
104 |
if (pref.height > 1){ |
|
105 |
val link = elem('|', 1, pref.height - 1) |
|
106 |
(head left_align ((port above link) beside pref)) left_align tail_print(rs1) |
|
107 |
} |
|
108 |
else{ |
|
109 |
(head left_align (port beside pref) ) left_align tail_print(rs1) |
|
110 |
} |
|
111 |
} |
|
112 |
} |
|
113 |
} |
|
7 | 114 |
def annotated_list_print(name: String, rs: List[ARexp], bs: List[Bit]): Element = { |
115 |
rs match { |
|
116 |
case r::Nil => { |
|
117 |
val pref = annotated_tree(r) |
|
118 |
val head = if(bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString) |
|
119 |
(head left_align (port up_align pref) ) |
|
120 |
} |
|
121 |
case r2::r1::Nil => { |
|
122 |
annotated_binary_print(name, r2, r1, bs) |
|
123 |
} |
|
124 |
case r::rs1 => { |
|
125 |
val pref = annotated_tree(r) |
|
126 |
val head = if (bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString) |
|
127 |
if (pref.height > 1){ |
|
128 |
val link = elem('|', 1, pref.height - 1) |
|
129 |
(head left_align ((port above link) beside pref)) left_align annotated_tail_print(rs1) |
|
130 |
} |
|
131 |
else{ |
|
132 |
(head left_align (port beside pref) ) left_align annotated_tail_print(rs1) |
|
133 |
} |
|
134 |
} |
|
135 |
} |
|
136 |
} |
|
137 |
||
138 |
def annotated_tail_print(rs: List[ARexp]): Element = { |
|
139 |
rs match { |
|
140 |
case r2::r1::Nil => { |
|
141 |
val pref = annotated_tree(r2) |
|
142 |
val suff = annotated_tree(r1) |
|
143 |
if (pref.height > 1){ |
|
144 |
val link = elem('|', 1, pref.height - 1) |
|
145 |
((port above link) beside pref) left_align (port up_align suff) |
|
146 |
} |
|
147 |
else{ |
|
148 |
(port beside pref) left_align (port up_align suff) |
|
149 |
} |
|
150 |
} |
|
151 |
case r2::rs1 => { |
|
152 |
val pref = annotated_tree(r2) |
|
153 |
||
154 |
if (pref.height > 1){ |
|
155 |
val link = elem('|', 1, pref.height - 1) |
|
156 |
((port above link) beside pref) left_align annotated_tail_print(rs1)//(port up_align tail_print(rs1) ) |
|
157 |
} |
|
158 |
else{ |
|
159 |
(port beside pref) left_align annotated_tail_print(rs1)//(port up_align tail_print(rs1)) |
|
160 |
} |
|
161 |
//pref left_align tail_print(rs1) |
|
162 |
} |
|
163 |
} |
|
164 |
} |
|
165 |
||
0 | 166 |
def tail_print(rs: List[ARexp]): Element = { |
167 |
rs match { |
|
168 |
case r2::r1::Nil => { |
|
169 |
val pref = aregx_tree(r2) |
|
170 |
val suff = aregx_tree(r1) |
|
171 |
if (pref.height > 1){ |
|
172 |
val link = elem('|', 1, pref.height - 1) |
|
173 |
((port above link) beside pref) left_align (port up_align suff) |
|
174 |
} |
|
175 |
else{ |
|
176 |
(port beside pref) left_align (port up_align suff) |
|
177 |
} |
|
178 |
} |
|
179 |
case r2::rs1 => { |
|
180 |
val pref = aregx_tree(r2) |
|
181 |
||
182 |
if (pref.height > 1){ |
|
183 |
val link = elem('|', 1, pref.height - 1) |
|
184 |
((port above link) beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1) ) |
|
185 |
} |
|
186 |
else{ |
|
187 |
(port beside pref) left_align tail_print(rs1)//(port up_align tail_print(rs1)) |
|
188 |
} |
|
189 |
//pref left_align tail_print(rs1) |
|
190 |
} |
|
191 |
} |
|
192 |
} |
|
193 |
||
194 |
def binary_print(name: String, r1: ARexp, r2: ARexp): Element = { |
|
195 |
val pref = aregx_tree(r1) |
|
196 |
val suff = aregx_tree(r2) |
|
7 | 197 |
val head = elem(name) |
0 | 198 |
if (pref.height > 1){ |
199 |
val link = elem('|', 1, pref.height - 1) |
|
200 |
(head left_align ((port above link) beside pref) ) left_align (port up_align suff) |
|
201 |
} |
|
202 |
else{ |
|
203 |
(head left_align (port beside pref) ) left_align (port up_align suff) |
|
204 |
} |
|
205 |
} |
|
7 | 206 |
def annotated_binary_print(name: String, r1: ARexp, r2: ARexp, bs: List[Bit]): Element = { |
207 |
val pref = annotated_tree(r1) |
|
208 |
val suff = annotated_tree(r2) |
|
209 |
val head = if (bs.isEmpty) elem(name) else elem(name ++ " ") beside elem(bs.toString) |
|
210 |
if (pref.height > 1){ |
|
211 |
val link = elem('|', 1, pref.height - 1) |
|
212 |
(head left_align ((port above link) beside pref) ) left_align (port up_align suff) |
|
213 |
} |
|
214 |
else{ |
|
215 |
(head left_align (port beside pref) ) left_align (port up_align suff) |
|
216 |
} |
|
217 |
} |
|
218 |
||
0 | 219 |
val arr_of_size = ListBuffer.empty[Int] |
2 | 220 |
|
0 | 221 |
def pC(r: Rexp): Set[Rexp] = {//PD's companion |
222 |
r match { |
|
223 |
case SEQ(r1, r2) => pC(r2) |
|
224 |
case ALTS(rs) => rs.flatMap(a => pC(a) ).toSet |
|
225 |
case CHAR(c) => Set(r) |
|
226 |
case r => Set() |
|
227 |
} |
|
228 |
} |
|
229 |
def illustration(r: Rexp, s: String){ |
|
230 |
var i_like_imperative_style = internalise(r) |
|
231 |
val all_chars = s.toList |
|
232 |
for (i <- 0 to s.length - 1){ |
|
233 |
val der_res = bder(all_chars(i), i_like_imperative_style) |
|
234 |
val simp_res = bsimp(der_res) |
|
235 |
println("The original regex, the regex after derivative w.r.t " + all_chars(i) + " and the simplification of the derivative.") |
|
236 |
println(aregx_tree(i_like_imperative_style) up_align aregx_tree(der_res) up_align aregx_tree(simp_res)) |
|
237 |
//println(asize(i_like_imperative_style), asize(der_res), asize(simp_res)) |
|
238 |
arr_of_size += asize(i_like_imperative_style) |
|
239 |
//println(asize(simp_res), asize(simp_res) / arr_of_size(0) ) |
|
240 |
i_like_imperative_style = simp_res |
|
241 |
} |
|
242 |
arr_of_size += asize(i_like_imperative_style) |
|
243 |
} |
|
244 |
val ran = scala.util.Random |
|
245 |
var alphabet_size = 3 |
|
246 |
def balanced_seq_star_gen(depth: Int, star: Boolean): Rexp = { |
|
247 |
if(depth == 1){ |
|
248 |
((ran.nextInt(4) + 97).toChar).toString |
|
249 |
} |
|
250 |
else if(star){ |
|
251 |
STAR(balanced_seq_star_gen(depth - 1, false)) |
|
252 |
} |
|
253 |
else{ |
|
254 |
SEQ(balanced_seq_star_gen(depth - 1, true), balanced_seq_star_gen(depth - 1, true)) |
|
255 |
} |
|
256 |
} |
|
257 |
def max(i: Int, j: Int): Int = { |
|
258 |
if(i > j) |
|
259 |
i |
|
260 |
else |
|
261 |
j |
|
262 |
} |
|
263 |
def random_struct_gen(depth:Int): Rexp = { |
|
264 |
val dice = ran.nextInt(3) |
|
265 |
val dice2 = ran.nextInt(3) |
|
266 |
(dice, depth) match { |
|
267 |
case (_, 0) => ((ran.nextInt(3) + 97).toChar).toString |
|
268 |
case (0, i) => STAR(random_struct_gen(max(0, i - 1 - dice2))) |
|
269 |
case (1, i) => SEQ(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2))) |
|
270 |
case (2, i) => ALTS( List(random_struct_gen(max(0, i - 1 - dice2)), random_struct_gen(max(0, i - 1 - dice2))) ) |
|
271 |
} |
|
272 |
} |
|
273 |
def balanced_struct_gen(depth: Int): Rexp = { |
|
274 |
val dice = ran.nextInt(3) |
|
275 |
(dice, depth) match { |
|
276 |
case (_, 0) => ((ran.nextInt(3) + 97).toChar).toString |
|
277 |
case (0, i) => STAR(random_struct_gen(depth - 1)) |
|
278 |
case (1, i) => SEQ(random_struct_gen(depth - 1), random_struct_gen(depth - 1)) |
|
279 |
case (2, i) => ALTS( List(random_struct_gen(depth - 1), random_struct_gen(depth - 1) ) ) |
|
280 |
} |
|
281 |
} |
|
6 | 282 |
def random_pool(n: Int, d: Int) : Stream[Rexp] = n match { |
283 |
case 0 => (for (i <- 1 to 10) yield balanced_struct_gen(d)).toStream |
|
284 |
case n => { |
|
285 |
val rs = random_pool(n - 1, d) |
|
286 |
rs #::: |
|
287 |
(for (i <- (1 to 10).toStream) yield balanced_struct_gen(d)) #::: |
|
288 |
(for (i <- (1 to 10).toStream) yield random_struct_gen(d)) |
|
289 |
} |
|
290 |
} |
|
291 |
||
0 | 292 |
def rd_string_gen(alp_size: Int, len: Int): String = { |
293 |
if( len > 0) |
|
294 |
((ran.nextInt(alp_size) + 97).toChar).toString + rd_string_gen(alp_size, len - 1) |
|
295 |
else |
|
296 |
((ran.nextInt(alp_size) + 97).toChar).toString |
|
297 |
} |
|
298 |
def plot(b: List[Int]){ |
|
299 |
println(b(0),b.max) |
|
300 |
||
301 |
} |
|
302 |
def dep_exp(depth: List[Int]){ |
|
303 |
for(i <- depth){ |
|
304 |
arr_of_size.clear() |
|
305 |
val s = rd_string_gen(alphabet_size, (i-8)*(i-8)+10) |
|
306 |
val r = random_struct_gen(i) |
|
307 |
println("depth: "+i) |
|
308 |
illustration(r, s) //"abcabadaaadcabdbabcdaadbabbcbbdabdabbcbdbabdbcdb") |
|
309 |
//println("depth: " + i + " general stats:"+ arr_of_size(0), arr_of_size.max, arr_of_size.max/arr_of_size(0)) |
|
310 |
//println("x y label alignment") |
|
311 |
/*for(i <- 0 to s.length - 1){ |
|
312 |
if(s(i) == '\n') |
|
313 |
println(i+" "+arr_of_size(i)+" "+"nl"+" -140") |
|
314 |
else if(s(i) == ' ') |
|
315 |
println(i+" "+arr_of_size(i)+" "+"sp"+" -140") |
|
316 |
else |
|
317 |
println(i+" "+arr_of_size(i)+" "+s(i)+" -140") |
|
318 |
}*/ |
|
319 |
//println(s.length + " " + arr_of_size(s.length) + " ]" + " -140") |
|
320 |
} |
|
321 |
} |
|
322 |
def case_study(ss: List[String], r: Rexp){ |
|
323 |
for(s <- ss){ |
|
324 |
arr_of_size.clear() |
|
325 |
illustration(r, s) |
|
326 |
println("x y label alignment") |
|
327 |
for(i <- 0 to s.length - 1){ |
|
328 |
if(s(i) == '\n') |
|
329 |
println(i+" "+arr_of_size(i)+" "+"nl"+" -140") |
|
330 |
else if(s(i) == ' ') |
|
331 |
println(i+" "+arr_of_size(i)+" "+"sp"+" -140") |
|
332 |
else |
|
333 |
println(i+" "+arr_of_size(i)+" "+s(i)+" -140") |
|
334 |
} |
|
335 |
} |
|
336 |
} |
|
337 |
def star_gen(dp: Int): Rexp = { |
|
338 |
if(dp > 0) |
|
339 |
STAR(star_gen(dp - 1)) |
|
340 |
else |
|
341 |
"a" |
|
342 |
} |
|
343 |
def strs_gen(len: Int, num: Int): List[String] = { |
|
344 |
if(num > 0){ |
|
345 |
rd_string_gen(3, len)::strs_gen(len, num - 1) |
|
346 |
} |
|
347 |
else{ |
|
348 |
Nil |
|
349 |
} |
|
350 |
} |
|
351 |
def regx_print(r: Rexp): String = { |
|
352 |
r match { |
|
353 |
case ZERO => |
|
354 |
"ZERO" |
|
355 |
case CHAR(c) => { |
|
356 |
//val Some(c) = alphabet.find(f) |
|
357 |
"\"" + c.toString + "\"" |
|
358 |
} |
|
359 |
case ONE => { |
|
360 |
"ONE" |
|
361 |
} |
|
362 |
case ALTS(rs) => { |
|
363 |
"ALTS(List("+(rs.map(regx_print)).foldLeft("")((a, b) => if(a == "") b else a + "," + b)+"))" |
|
364 |
} |
|
365 |
case SEQ(r1, r2) => { |
|
366 |
"SEQ(" + regx_print(r1) + "," + regx_print(r2) + ")" |
|
367 |
} |
|
368 |
case STAR(r) => { |
|
369 |
"STAR(" + regx_print(r) + ")" |
|
370 |
} |
|
371 |
} |
|
372 |
} |
|
373 |
val mkst = "abcdefghijklmnopqrstuvwxyz" |
|
374 |
def weak_sub_check(r: Rexp, s: String, i: Int, f: (List[Rexp], Set[Rexp]) => Boolean){ |
|
375 |
//we first compute pders over the set of all strings on the alphabet |
|
376 |
val pd = pderas(Set(r), i + 4) |
|
377 |
//then "b-internalise" the regular expression into a brexp(this is essentially |
|
378 |
//attaching a bit Z to every alts to signify that they come from the original regular expression) |
|
379 |
var old = brternalise(r) |
|
380 |
//this is for comparison between normal simp and the weakened version of simp |
|
381 |
//normal simp will be performed on syncold |
|
382 |
//weakend simp will be performed on old |
|
5 | 383 |
var syncold = internalise(r) |
0 | 384 |
val all_chars = s.toList |
385 |
for (i <- 0 to s.length - 1){ |
|
5 | 386 |
val syncder_res = bder(all_chars(i), syncold) |
387 |
val syncsimp_res = super_bsimp(syncder_res) |
|
0 | 388 |
//see brder for detailed explanation |
389 |
//just changes bit Z to S when deriving an ALTS, |
|
390 |
//signifying that the structure has been "touched" and |
|
391 |
//therefore able to be spilled in the bspill function |
|
392 |
val der_res = brder(all_chars(i), old) |
|
393 |
val simp_res = br_simp(der_res) |
|
394 |
val anatomy = bspill(simp_res) |
|
395 |
//track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*) |
|
5 | 396 |
if(f(List(berase(simp_res)), pd) == false ){ |
397 |
println(size(erase(syncsimp_res))) |
|
0 | 398 |
println(size(berase(simp_res))) |
3 | 399 |
println(bregx_tree(simp_res)) |
5 | 400 |
println(s) |
401 |
println(i) |
|
402 |
println(r) |
|
0 | 403 |
println(anatomy.map(size).sum) |
404 |
println(pd.map(size).sum) |
|
405 |
} |
|
406 |
old = simp_res |
|
407 |
syncold = syncsimp_res |
|
408 |
} |
|
409 |
} |
|
410 |
def inclusion_truth(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = { |
|
411 |
val aset = anatomy.toSet |
|
412 |
if(aset subsetOf pd){ |
|
413 |
true |
|
414 |
} |
|
415 |
else{ |
|
416 |
println("inclusion not true") |
|
417 |
false |
|
418 |
} |
|
419 |
} |
|
420 |
def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true} |
|
5 | 421 |
def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true} |
4 | 422 |
def ders_simp(r: ARexp, s: List[Char]): ARexp = { |
423 |
s match { |
|
424 |
case Nil => r |
|
425 |
case c::cs => ders_simp(bsimp(bder(c, r)), cs) |
|
426 |
} |
|
427 |
} |
|
5 | 428 |
val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c'))))))) |
429 |
val str_panda = "ccccb" |
|
0 | 430 |
def check_all(){ |
5 | 431 |
|
432 |
weak_sub_check(big_panda, str_panda, 6, size_expansion_rate) |
|
433 |
||
0 | 434 |
} |
93 | 435 |
def bstostick(bs: List[Bit]): Element = bs match { |
436 |
//case b::Nil => elem(b.toString) |
|
437 |
case b::bs1 => elem(b.toString) beside bstostick(bs1) |
|
438 |
case Nil => elem(' ', 1, 1) |
|
439 |
} |
|
440 |
def bits_print(r: ARexp): Element = r match { |
|
441 |
case AALTS(bs,rs) => { |
|
442 |
val bitstick = bstostick(bs) |
|
443 |
rs match { |
|
444 |
case r::rs1 => |
|
445 |
rs1.foldLeft( |
|
446 |
((elem("(") left_align bitstick) beside |
|
447 |
bits_print(r)) )( (acc, r2) => acc beside ((elem(" + ") above elem(" ")) beside bits_print(r2) )) beside |
|
448 |
elem(")") |
|
449 |
case Nil => elem(' ', 1, 1) |
|
450 |
} |
|
451 |
} |
|
452 |
case ASEQ(bs, r1, r2) => { |
|
103 | 453 |
((elem("[") left_align bstostick(bs)) beside bits_print(r1) beside elem("~") beside bits_print(r2) beside (elem("]") above elem(" "))) |
93 | 454 |
} |
455 |
case ASTAR(bs, r) => { |
|
456 |
r match { |
|
457 |
case AONE(_) | AZERO | ACHAR(_, _) => { |
|
458 |
(elem("{") left_align bstostick(bs)) beside (bits_print(r) beside elem("}*")) |
|
459 |
} |
|
460 |
case _ => { |
|
461 |
(elem("{") left_align bstostick(bs)) beside (bits_print(r) beside elem("}*")) |
|
462 |
} |
|
463 |
} |
|
464 |
} |
|
465 |
case AONE(bs) => { |
|
466 |
elem("1") left_align bstostick(bs) |
|
467 |
} |
|
468 |
case ACHAR(bs, c) => { |
|
469 |
elem(c, 1, 1) left_align bstostick(bs) |
|
470 |
} |
|
471 |
case AZERO => |
|
472 |
{ |
|
473 |
elem ("0") above elem(" ") |
|
474 |
} |
|
475 |
} |
|
476 |
def happy_print(r: Rexp): Unit = r match { |
|
477 |
case ALTS( r::rs ) => { |
|
478 |
print("(") |
|
479 |
happy_print(r) |
|
480 |
rs.map(r2=>{ |
|
481 |
print(" + ") |
|
482 |
happy_print(r2) |
|
483 |
}) |
|
484 |
print(")") |
|
485 |
} |
|
486 |
case SEQ(r1, r2) => { |
|
487 |
happy_print(r1) |
|
488 |
print("~") |
|
489 |
happy_print(r2) |
|
490 |
} |
|
491 |
case STAR(r) => { |
|
492 |
r match { |
|
493 |
case ONE | ZERO | CHAR(_) => { |
|
494 |
happy_print(r) |
|
495 |
print("*") |
|
496 |
} |
|
497 |
case _ => { |
|
498 |
print("[") |
|
499 |
happy_print(r) |
|
500 |
print("]*") |
|
501 |
} |
|
502 |
} |
|
503 |
} |
|
504 |
case ONE => { |
|
505 |
print("1") |
|
506 |
} |
|
507 |
case ZERO => { |
|
508 |
print("0") |
|
509 |
} |
|
510 |
case CHAR(c) =>{ |
|
511 |
print(c) |
|
512 |
} |
|
513 |
} |
|
514 |
def bits_slide(s: String, r: Rexp){ |
|
515 |
val nangao = ders_simp(internalise(r), s.toList) |
|
516 |
val easy = bsimp(bders(s.toList, internalise(r))) |
|
517 |
println(s) |
|
518 |
println(r) |
|
519 |
happy_print(r) |
|
520 |
println() |
|
521 |
print(bits_print(nangao)) |
|
522 |
println() |
|
523 |
print(bits_print(easy)) |
|
524 |
println() |
|
525 |
} |
|
526 |
//found r = SEQ(ALTS(List(CHAR(c), CHAR(a))),SEQ(ALTS(List(CHAR(a), CHAR(c))),ALTS(List(CHAR(b), CHAR(c))))) s = "aa" |
|
527 |
def bsimp_print(r: ARexp): Unit = r match { |
|
528 |
case ASEQ(bs, r1, r2) => |
|
529 |
{ |
|
530 |
println("0.r or r.0 to 0 or bs1 1.r to fuse bs++bs1 r") |
|
531 |
bits_print(bsimp(r1)) |
|
532 |
bits_print(bsimp(r2)) |
|
533 |
} |
|
534 |
case AALTS(bs, rs) => { |
|
535 |
println("rs.map(bsimp) equals *************") |
|
536 |
val rs_simp = rs.map(bsimp) |
|
537 |
for(r <- rs_simp){ |
|
538 |
println(bits_print(r)) |
|
539 |
} |
|
540 |
println("*************") |
|
541 |
println("flts(rs_simp)") |
|
542 |
val flat_res = flats(rs_simp) |
|
543 |
for(r <- flat_res){ |
|
544 |
println(bits_print(r)) |
|
545 |
} |
|
546 |
println("dB(flat_res)") |
|
547 |
val dist_res = distinctBy(flat_res, erase) |
|
548 |
for(r <- dist_res){ |
|
549 |
println(bits_print(r)) |
|
550 |
} |
|
551 |
dist_res match { |
|
552 |
case Nil => println("AZERO") |
|
553 |
case r::Nil => println("fuse(bs, r)") |
|
554 |
case rs => println("AALTS(bs, rs)") |
|
555 |
} |
|
556 |
} |
|
557 |
case _ => println("No simp at this level") |
|
558 |
} |
|
559 |
def tellmewhy(){ |
|
560 |
//val r = SEQ(ALTS(List(CHAR('a'), CHAR('b'))),ALTS(List(ALTS(List(CHAR('a'), CHAR('a'))), STAR(CHAR('a'))))) |
|
107 | 561 |
//val r = SEQ(ALTS(List(CHAR('a'), CHAR('b'))),ALTS(List(CHAR('a'), STAR(CHAR('a')) ) )) |
562 |
val r = ("ab" | ( (("a")%) | "aa") ) |
|
93 | 563 |
//val r = ("a"|"b")~("a") |
564 |
val s = "aa" |
|
123 | 565 |
for(i <- 0 to s.length-1){ |
93 | 566 |
val ss = s.slice(0, i+1) |
107 | 567 |
val nangao = bders_simp_rf(ss.toList, internalise(r)) |
93 | 568 |
val easy = (bders(ss.toList, internalise(r))) |
123 | 569 |
println("iteration starts") |
570 |
println("bders_simp") |
|
93 | 571 |
println(bits_print(nangao)) |
107 | 572 |
println() |
123 | 573 |
println("bders") |
93 | 574 |
println(bits_print(easy)) |
575 |
println() |
|
123 | 576 |
println("new simp") |
107 | 577 |
println(bits_print(bsimp_rf(easy))) |
123 | 578 |
println("iteration ends") |
93 | 579 |
} |
123 | 580 |
println("old simp ders") |
93 | 581 |
println(bits_print(bsimp(bders(s.toList, internalise(r))))) |
123 | 582 |
println("old derssimp") |
101 | 583 |
println(bits_print(ders_simp(internalise(r), s.toList))) |
93 | 584 |
} |
585 |
def find_re(){ |
|
586 |
for (i <- 1 to 10000){ |
|
587 |
val r = balanced_struct_gen(3) |
|
588 |
val s = rd_string_gen(2,1) |
|
589 |
val nangao = ders_simp(internalise(r), s.toList) |
|
590 |
val easy = bsimp(bders(s.toList, internalise(r))) |
|
591 |
if(nangao != easy){ |
|
592 |
bits_slide(s, r) |
|
593 |
} |
|
594 |
} |
|
595 |
} |
|
5 | 596 |
//simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set) |
10 | 597 |
def pushbits(r: ARexp): ARexp = r match { |
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
10
diff
changeset
|
598 |
case AALTS(bs, rs) => AALTS(Nil, rs.map(r=>fuse(bs, pushbits(r)))) |
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
10
diff
changeset
|
599 |
case ASEQ(bs, r1, r2) => ASEQ(bs, pushbits(r1), pushbits(r2)) |
10 | 600 |
case r => r |
601 |
} |
|
4 | 602 |
def correctness_proof_convenient_path(){ |
107 | 603 |
for(i <- 1 to 19999){ |
604 |
val s = rd_string_gen(alphabet_size, 3)//"abaa"//rd_string_gen(alphabet_size, 3) |
|
605 |
val r = internalise(random_struct_gen(4))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) |
|
4 | 606 |
for(j <- 0 to s.length - 1){ |
607 |
val ss = s.slice(0, j+ 1) |
|
107 | 608 |
val nangao = bders_simp_rf(ss.toList, r) |
609 |
val easy = bsimp_rf(bders_rf(ss.toList, r)) |
|
610 |
if(!(nangao == easy)){ |
|
4 | 611 |
println(j) |
10 | 612 |
println("not equal") |
5 | 613 |
println("string") |
4 | 614 |
println(ss) |
5 | 615 |
println("original regex") |
107 | 616 |
println(annotated_tree(r)) |
5 | 617 |
println("regex after ders simp") |
10 | 618 |
println(annotated_tree(nangao)) |
5 | 619 |
println("regex after ders") |
107 | 620 |
println(annotated_tree(bders(ss.toList, r)))//flats' fuse when opening up AALTS causes the difference |
5 | 621 |
println("regex after ders and then a single simp") |
7 | 622 |
println(annotated_tree(easy)) |
4 | 623 |
} |
624 |
} |
|
107 | 625 |
} |
4 | 626 |
} |
13 | 627 |
/* if(bts == cdbts){//test of equality code v = retrieve internalise(r) v if |- v : r |
628 |
println(bts) |
|
629 |
println(cdbts) |
|
630 |
println("code v = retrieve internalise(r) v if |- v : r") |
|
631 |
} |
|
14 | 632 |
|
633 |
||
634 |
val r_der_s = bders(st.toList, rg) |
|
635 |
println(aregx_tree(r_der_s)) |
|
636 |
val bts = retrieve(r_der_s, unsimplified_vl) |
|
637 |
val cdbts = code(unsimplified_vl) |
|
638 |
val simprg = bsimp(r_der_s) |
|
639 |
val frectv = decode(erase(simprg), cdbts) |
|
640 |
val simpbts = retrieve(simprg, frectv) |
|
641 |
if(bts == simpbts){ |
|
642 |
println("retrieve r v = retrieve (bsimp r) (decode bsimp(r) code(v))") |
|
643 |
println("bits:") |
|
644 |
//println(bts) |
|
645 |
println(simpbts) |
|
646 |
println("v = ") |
|
647 |
println(unsimplified_vl) |
|
648 |
println("frect v = ") |
|
649 |
println(frectv) |
|
650 |
} |
|
17 | 651 |
*///KH8W5BXL |
652 |
def nice_lex(r: Rexp, s: List[Char], ar: ARexp) : Val = s match { |
|
653 |
case Nil => |
|
654 |
if (nullable(r)){ |
|
655 |
val vr = mkeps(r) |
|
656 |
val bits1 = retrieve(ar, vr) |
|
657 |
val av = bsimp2(ar, vr) |
|
658 |
val bits2 = retrieve(av._1, av._2) |
|
659 |
if(bits1 != bits2) throw new Exception("bsimp2 does not work") |
|
660 |
vr |
|
661 |
} |
|
662 |
else throw new Exception("Not matched") |
|
663 |
case c::cs => { |
|
664 |
val vr = inj(r, c, nice_lex(der(c, r), cs, bder(c, ar))); |
|
665 |
val bits1 = retrieve(ar, vr); |
|
666 |
val av = bsimp2(ar, vr); |
|
667 |
val bits2 = retrieve(av._1, av._2); |
|
668 |
if(bits1 != bits2) throw new Exception("bsimp2 does not work"); |
|
669 |
vr |
|
670 |
} |
|
671 |
} |
|
672 |
def test_bsimp2(){ |
|
673 |
for(i <- 1 to 1000){ |
|
674 |
val rg = (balanced_struct_gen(9))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) |
|
675 |
val st = rd_string_gen(3, 4) |
|
676 |
val a = internalise(rg) |
|
677 |
val final_derivative = ders(st.toList, rg) |
|
678 |
if(nullable(final_derivative)) |
|
679 |
nice_lex(rg, st.toList, a) |
|
680 |
} |
|
681 |
} |
|
15 | 682 |
|
17 | 683 |
def neat_retrieve(){ |
684 |
for(i <- 1 to 1000){ |
|
685 |
val rg = internalise(balanced_struct_gen(6))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) |
|
686 |
val st = rd_string_gen(3, 5) |
|
687 |
val final_derivative = ders(st.toList, erase(rg)) |
|
688 |
if(nullable(final_derivative)){ |
|
689 |
val unsimplified_vl = mkeps(final_derivative) |
|
690 |
val arexp_for_retrieve = bders( st.toList, rg) |
|
691 |
val simp_ar_vl = bsimp2(arexp_for_retrieve, unsimplified_vl) |
|
692 |
val bits1 = retrieve(arexp_for_retrieve, unsimplified_vl) |
|
693 |
val bits2 = retrieve(simp_ar_vl._1, simp_ar_vl._2) |
|
694 |
if(bits1 != bits2){ |
|
695 |
println("nOOOOOOOOOO!") |
|
15 | 696 |
} |
697 |
} |
|
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
10
diff
changeset
|
698 |
} |
17 | 699 |
} |
700 |
||
7 | 701 |
def radical_correctness(){ |
702 |
enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet |
|
703 |
random_pool(1, 5).map(tests_blexer_simp(strs(5, "abc"))).toSet |
|
704 |
} |
|
15 | 705 |
def christian_def(){ |
706 |
val r = ALTS(List(SEQ(ZERO,CHAR('b')), ONE)) |
|
707 |
val v = Right(Empty) |
|
708 |
val a = internalise(r) |
|
17 | 709 |
val a_v = bsimp2(a,v) |
15 | 710 |
println(s"Testing ${r} and ${v}") |
711 |
println(s"internalise(r) = ${a}") |
|
17 | 712 |
println(s"a_v = ${a_v}") |
15 | 713 |
val bs1 = retrieve(a, v) |
714 |
println(bs1) |
|
17 | 715 |
println(s"as = ${a_v._1}") |
15 | 716 |
//val bs2 = retrieve(as, decode(erase(as), bs1)) |
17 | 717 |
val bs3 = retrieve(a_v._1, a_v._2)//decode(erase(as), bs1) does not work |
718 |
//println(decode(erase(as), bs1)) |
|
719 |
println(s"bs1=${bs1}, bs3=${bs3}") |
|
720 |
//println(decode(erase(a_v._1), bs3)) |
|
15 | 721 |
} |
72 | 722 |
def christian_def2(){ |
89 | 723 |
val a = AALTS(List(S), List(AZERO, ASEQ(List(S), AALTS(List(S), List(AONE(List(S)), ACHAR(List(S), 'c'))), ACHAR(List(S),'c') )) ) |
724 |
//AALTS(List(Z), List(AZERO, ASEQ(List(), AALTS(List(),List(AONE(List()), ACHAR(Nil, 'b'))), ACHAR(Nil, 'b')) ) ) |
|
725 |
val unsimp = bsimp(bder('c',a)) |
|
726 |
val simped = bsimp(bder('c', bsimp(a)) ) |
|
72 | 727 |
println(bsimp(a)) |
89 | 728 |
if(unsimp != simped){ |
72 | 729 |
println(s"bsimp(bder c r) = ${unsimp}, whereas bsimp(bder c bsimp r) = ${simped}") |
730 |
} |
|
731 |
} |
|
15 | 732 |
def essence_posix(){ |
16 | 733 |
//val s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"//rd_string_gen(alphabet_size, 3)//"abaa"//rd_string_gen(alphabet_size, 3) |
734 |
val s0 = "a" |
|
735 |
val r = SEQ(STAR(ALT("a", "aa")), "b")//internalise(random_struct_gen(4))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) |
|
736 |
for(i <- 1 to 40){ |
|
737 |
val s = s0*i |
|
738 |
//printf("%d %d\n",i, size(ders(s.toList, r))) |
|
739 |
printf("%d %d\n",i, asize(ders_simp( internalise(r), s.toList))) |
|
740 |
//println(asize(ders_simp( internalise(r), s.toList))) |
|
741 |
} |
|
15 | 742 |
} |
16 | 743 |
|
87 | 744 |
def speed_test(){ |
745 |
val s0 = "a"*1000 |
|
746 |
val r = SEQ(STAR("a"), "b") |
|
747 |
for(i <- 1 to 30){ |
|
748 |
val s = s0*i |
|
749 |
val start = System.nanoTime() |
|
750 |
try{ |
|
751 |
blex_simp(internalise(r), s.toList) |
|
752 |
} |
|
753 |
catch{ |
|
754 |
case x: Exception => |
|
755 |
} |
|
756 |
val end = System.nanoTime() |
|
757 |
printf("%d %f\n",i, (end - start)/1.0e9) |
|
758 |
} |
|
759 |
} |
|
91 | 760 |
/* |
761 |
lemma retrieve_encode_STARS: |
|
762 |
assumes "∀v∈set vs. ⊨ v : r ∧ code v = retrieve (intern r) v" |
|
763 |
shows "code (Stars vs) = retrieve (ASTAR [] (intern r)) (Stars vs)" |
|
764 |
*/ |
|
765 |
def retrieve_encode_STARS(){ |
|
766 |
val r = ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('b')) ) |
|
767 |
//val v = Stars(List(Sequ(Chr('a'), Chr('b')), Chr('a')) ) |
|
768 |
val v1 = Right(Sequ(Chr('a'), Chr('b'))) |
|
769 |
val v2 = Left(Chr('a')) |
|
770 |
val compressed1 = code(v1) |
|
771 |
val compressed2 = code(v2) |
|
772 |
val xompressed1 = retrieve(internalise(r), v1) |
|
773 |
val xompressed2 = retrieve(internalise(r), v2) |
|
774 |
println(compressed1, compressed2, xompressed1, xompressed2) |
|
775 |
val v = Stars(List(v1, v2)) |
|
776 |
val compressed = code(v) |
|
777 |
val a = ASTAR(List(), internalise(r)) |
|
778 |
val xompressed = retrieve(a, v) |
|
779 |
println(compressed, xompressed) |
|
780 |
} |
|
781 |
//what does contains7 do? |
|
782 |
//it causes exception |
|
783 |
//relation between retreive, bder and injection |
|
784 |
// a match error occurs when doing the injection |
|
785 |
def contains7(){ |
|
786 |
val r = STAR( SEQ(CHAR('a'),CHAR('b')) ) |
|
787 |
val v = Sequ(Chr('b'), Stars(List(Sequ(Chr('a'), Chr('b'))))) |
|
788 |
val a = internalise(r) |
|
789 |
val c = 'a' |
|
790 |
val v_aug = inj(r, c, v) |
|
791 |
println("bder c r:") |
|
792 |
println(bder(c, a)) |
|
793 |
println("r:") |
|
794 |
println(a) |
|
795 |
println("bits they can both produce:") |
|
796 |
println(retrieve(a, v_aug)) |
|
797 |
} |
|
798 |
def der_seq(r:ARexp, s: List[Char],acc: List[ARexp]) : List[ARexp] = s match{ |
|
799 |
case Nil => acc ::: List(r) |
|
800 |
case c::cs => der_seq(bder(c, r), cs, acc ::: List(r)) |
|
801 |
} |
|
802 |
def der_seq_rev(r:ARexp, s: List[Char], acc: List[ARexp]): List[ARexp] = s match{ |
|
803 |
case Nil => r::acc |
|
804 |
case c::cs =>der_seq_rev(r, cs, bders(s, r) :: acc ) |
|
805 |
} |
|
806 |
def re_close(l1: List[ARexp], l2: List[ARexp], re_init: ARexp): ARexp = l1 match { |
|
807 |
case Nil => re_init |
|
808 |
case c::cs => if(bnullable(c)) re_close(cs, l2.tail, AALTS(List(), List(re_init, fuse(mkepsBC(c), l2.head)) ) ) |
|
809 |
else re_close(cs, l2.tail, re_init) |
|
810 |
} |
|
92 | 811 |
//HERE |
91 | 812 |
def closed_string_der(r1: ARexp, r2: ARexp, s: String): ARexp = { |
813 |
val l1 = der_seq(r1, s.toList, Nil) |
|
814 |
val l2 = der_seq_rev(r2, s.toList, Nil) |
|
93 | 815 |
val Re = re_close((l1.reverse).tail, l2.tail, ASEQ(List(), l1.last, l2.head)) |
91 | 816 |
print(Re) |
817 |
val Comp = bders( s.toList, ASEQ(List(), r1, r2)) |
|
818 |
print(Comp ) |
|
819 |
if(Re != Comp){ |
|
820 |
println("boooooooooooooooo!joihniuguyfuyftyftyufuyids gheioueghrigdhxilj") |
|
821 |
} |
|
822 |
Re |
|
823 |
} |
|
824 |
def newxp1(){ |
|
825 |
val r1 = internalise("ab"|"") |
|
826 |
val r2 = internalise(("b")%) |
|
827 |
val s = "abbbbb" |
|
828 |
val s2= "bbbbb" |
|
829 |
val s3 = "aaaaaa" |
|
830 |
//closed_string_der(r1, r2, s) |
|
831 |
//closed_string_der(r1, r2, s2) |
|
832 |
closed_string_der(r1, r2, s3) |
|
833 |
} |
|
92 | 834 |
|
835 |
def string_der_test(){ |
|
836 |
for(i <- 0 to 10){ |
|
837 |
val s = rd_string_gen(alphabet_size, i).toList |
|
838 |
val r = random_struct_gen(2) |
|
839 |
if(ders(s, r) != ders2(s, r)){ |
|
840 |
println(i) |
|
841 |
println(s) |
|
842 |
println(r) |
|
843 |
println(ders(s, r)) |
|
844 |
println(ders2(s,r)) |
|
845 |
println("neq") |
|
846 |
} |
|
847 |
} |
|
848 |
} |
|
109 | 849 |
def have_fun(){ |
850 |
val bis = List(S,S) |
|
851 |
val bits = List(S,S,Z) |
|
852 |
val reg = ("a" | (("a")%) )~("b") |
|
853 |
val res = decode_aux(reg, bis) |
|
854 |
val result = decode_aux(reg, bis) |
|
855 |
val result1 = decode_aux(reg, List(Z)) |
|
856 |
println(res) |
|
857 |
println(result) |
|
858 |
println(bsimp(bders( "a".toList, internalise(reg)))) |
|
859 |
println(result1) |
|
860 |
} |
|
0 | 861 |
def main(args: Array[String]) { |
93 | 862 |
//println(S.toString) |
863 |
//find_re() |
|
107 | 864 |
//tellmewhy() |
865 |
//correctness_proof_convenient_path() |
|
110 | 866 |
tellmewhy() |
867 |
//have_fun() |
|
93 | 868 |
//string_der_test() |
92 | 869 |
//comp(rd_string_gen(3,6).toList, random_struct_gen(7)) |
870 |
//newxp1() |
|
91 | 871 |
//contains7() |
872 |
//retrieve_encode_STARS() |
|
4 | 873 |
//check_all() |
7 | 874 |
//radical_correctness() |
11
9c1ca6d6e190
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents:
10
diff
changeset
|
875 |
//correctness_proof_convenient_path() |
15 | 876 |
//retrieve_experience() |
17 | 877 |
//neat_retrieve() |
72 | 878 |
//test_bsimp2() |
91 | 879 |
//christian_def2() |
16 | 880 |
//christian_def() |
17 | 881 |
//essence_posix() |
89 | 882 |
//speed_test() |
0 | 883 |
} |
884 |
} |
|
93 | 885 |
//List( ASTAR(List(Z),ACHAR(List(),a)), AALTS(List(S),List(ACHAR(List(Z),b), ACHAR(List(S),a))) ) |