27 } |
27 } |
28 } |
28 } |
29 val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c') |
29 val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c') |
30 def bregx_tree(r: BRexp): Element = regx_tree(berase(r)) |
30 def bregx_tree(r: BRexp): Element = regx_tree(berase(r)) |
31 def regx_tree(r: Rexp): Element = aregx_tree(internalise(r)) |
31 def regx_tree(r: Rexp): Element = aregx_tree(internalise(r)) |
|
32 def annotated_tree(r: ARexp): Element = { |
|
33 r match { |
|
34 case ACHAR(bs, d) => { |
|
35 //val Some(d) = alphabet.find(f) |
|
36 d match { |
|
37 case '\n' => elem("\\n") |
|
38 case '\t' => elem("\\t") |
|
39 case ' ' => elem("space") |
|
40 case d => elem(d.toString) |
|
41 } |
|
42 } |
|
43 case AONE(bs) => { |
|
44 elem("ONE") |
|
45 } |
|
46 case AZERO => { |
|
47 elem("ZERO") |
|
48 } |
|
49 case ASEQ(bs, r1, r2) => { |
|
50 binary_print("SEQ", r1, r2) |
|
51 } |
|
52 case AALTS(bs, rs) => { |
|
53 //elem("Awaiting completion") |
|
54 list_print("ALT", rs) |
|
55 } |
|
56 case ASTAR(bs, r) => { |
|
57 list_print("STA", List(r)) |
|
58 } |
|
59 } |
|
60 } |
32 def aregx_tree(r: ARexp): Element = { |
61 def aregx_tree(r: ARexp): Element = { |
33 r match { |
62 r match { |
34 case ACHAR(bs, d) => { |
63 case ACHAR(bs, d) => { |
35 //val Some(d) = alphabet.find(f) |
64 //val Some(d) = alphabet.find(f) |
36 d match { |
65 d match { |
274 //attaching a bit Z to every alts to signify that they come from the original regular expression) |
303 //attaching a bit Z to every alts to signify that they come from the original regular expression) |
275 var old = brternalise(r) |
304 var old = brternalise(r) |
276 //this is for comparison between normal simp and the weakened version of simp |
305 //this is for comparison between normal simp and the weakened version of simp |
277 //normal simp will be performed on syncold |
306 //normal simp will be performed on syncold |
278 //weakend simp will be performed on old |
307 //weakend simp will be performed on old |
279 var syncold = brternalise(r) |
308 var syncold = internalise(r) |
280 val all_chars = s.toList |
309 val all_chars = s.toList |
281 for (i <- 0 to s.length - 1){ |
310 for (i <- 0 to s.length - 1){ |
282 val syncder_res = brder(all_chars(i), syncold) |
311 val syncder_res = bder(all_chars(i), syncold) |
283 val syncsimp_res = strong_br_simp(syncder_res) |
312 val syncsimp_res = super_bsimp(syncder_res) |
284 //see brder for detailed explanation |
313 //see brder for detailed explanation |
285 //just changes bit Z to S when deriving an ALTS, |
314 //just changes bit Z to S when deriving an ALTS, |
286 //signifying that the structure has been "touched" and |
315 //signifying that the structure has been "touched" and |
287 //therefore able to be spilled in the bspill function |
316 //therefore able to be spilled in the bspill function |
288 val der_res = brder(all_chars(i), old) |
317 val der_res = brder(all_chars(i), old) |
289 val simp_res = br_simp(der_res) |
318 val simp_res = br_simp(der_res) |
290 val anatomy = bspill(simp_res) |
319 val anatomy = bspill(simp_res) |
291 //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*) |
320 //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*) |
292 if(f(anatomy, pd) == false || i == 1){ |
321 if(f(List(berase(simp_res)), pd) == false ){ |
293 println(size(berase(syncsimp_res))) |
322 println(size(erase(syncsimp_res))) |
294 println(size(berase(simp_res))) |
323 println(size(berase(simp_res))) |
295 println(bregx_tree(simp_res)) |
324 println(bregx_tree(simp_res)) |
|
325 println(s) |
|
326 println(i) |
|
327 println(r) |
|
328 println() |
296 println(anatomy.map(size).sum) |
329 println(anatomy.map(size).sum) |
297 println(pd.map(size).sum) |
330 println(pd.map(size).sum) |
298 } |
331 } |
299 old = simp_res |
332 old = simp_res |
300 syncold = syncsimp_res |
333 syncold = syncsimp_res |
309 println("inclusion not true") |
342 println("inclusion not true") |
310 false |
343 false |
311 } |
344 } |
312 } |
345 } |
313 def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true} |
346 def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true} |
314 def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true} |
347 def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true} |
315 def ders_simp(r: ARexp, s: List[Char]): ARexp = { |
348 def ders_simp(r: ARexp, s: List[Char]): ARexp = { |
316 s match { |
349 s match { |
317 case Nil => r |
350 case Nil => r |
318 case c::cs => ders_simp(bsimp(bder(c, r)), cs) |
351 case c::cs => ders_simp(bsimp(bder(c, r)), cs) |
319 } |
352 } |
320 } |
353 } |
|
354 val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c'))))))) |
|
355 val str_panda = "ccccb" |
321 def check_all(){ |
356 def check_all(){ |
322 for(i <- 1 to 1) |
357 |
323 { |
358 weak_sub_check(big_panda, str_panda, 6, size_expansion_rate) |
324 val s = "bb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5) |
359 |
325 val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) |
360 } |
326 //subset_check(r, s) |
361 //simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set) |
327 weak_sub_check(r, s, 5, size_expansion_rate) |
362 |
328 } |
|
329 } |
|
330 def correctness_proof_convenient_path(){ |
363 def correctness_proof_convenient_path(){ |
331 for(i <- 1 to 1) |
364 for(i <- 1 to 1) |
332 { |
365 { |
333 val s = "abaa"//rd_string_gen(alphabet_size, 3) |
366 val s = "abaa"//rd_string_gen(alphabet_size, 3) |
334 val r = ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) |
367 val r = ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) |
335 for(j <- 0 to s.length - 1){ |
368 for(j <- 0 to s.length - 1){ |
336 val ss = s.slice(0, j+ 1) |
369 val ss = s.slice(0, j+ 1) |
337 val nangao = ders_simp(r, ss.toList) |
370 val nangao = ders_simp(r, ss.toList) |
338 val easy = bsimp(bders(ss.toList, r)) |
371 val easy = bsimp(bders(ss.toList, r)) |
339 if(nangao != easy|| j == 2){ |
372 if(true){ |
340 println(j) |
373 println(j) |
341 if(j == 3) println("not equal") |
374 if(j == 3) println("not equal") |
|
375 println("string") |
342 println(ss) |
376 println(ss) |
|
377 println("original regex") |
343 println(r) |
378 println(r) |
|
379 println("regex after ders simp") |
344 println(nangao) |
380 println(nangao) |
|
381 println("regex after ders") |
|
382 println(bders(ss.toList, r)) |
|
383 println("regex after ders and then a single simp") |
345 println(easy) |
384 println(easy) |
346 } |
385 } |
347 } |
386 } |
348 } |
387 } |
349 } |
388 } |
350 def main(args: Array[String]) { |
389 def main(args: Array[String]) { |
351 //check_all() |
390 //check_all() |
352 correctness_proof_convenient_path |
391 enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet |
|
392 //correctness_proof_convenient_path() |
353 } |
393 } |
354 } |
394 } |
355 |
395 |