Spiral.scala
changeset 5 622ddbb1223a
parent 4 7a349fe58bf4
child 6 26b40a985622
equal deleted inserted replaced
4:7a349fe58bf4 5:622ddbb1223a
    27     }
    27     }
    28   }
    28   }
    29   val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
    29   val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
    30   def bregx_tree(r: BRexp): Element = regx_tree(berase(r))
    30   def bregx_tree(r: BRexp): Element = regx_tree(berase(r))
    31   def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
    31   def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
       
    32   def annotated_tree(r: ARexp): Element = {
       
    33     r match {
       
    34       case ACHAR(bs, d) => {
       
    35         //val Some(d) = alphabet.find(f)
       
    36         d match {
       
    37           case '\n' => elem("\\n")
       
    38           case '\t' => elem("\\t")
       
    39           case ' ' => elem("space")
       
    40           case d => elem(d.toString)
       
    41         }   
       
    42       }
       
    43       case AONE(bs) => {
       
    44         elem("ONE")
       
    45       }
       
    46       case AZERO => {
       
    47         elem("ZERO")
       
    48       }
       
    49       case ASEQ(bs, r1, r2) => {
       
    50         binary_print("SEQ", r1, r2)
       
    51       }
       
    52       case AALTS(bs, rs) => {
       
    53         //elem("Awaiting completion")
       
    54         list_print("ALT", rs)
       
    55       }
       
    56       case ASTAR(bs, r) => {
       
    57         list_print("STA", List(r))
       
    58       }
       
    59     } 
       
    60   }
    32   def aregx_tree(r: ARexp): Element = {
    61   def aregx_tree(r: ARexp): Element = {
    33     r match {
    62     r match {
    34       case ACHAR(bs, d) => {
    63       case ACHAR(bs, d) => {
    35         //val Some(d) = alphabet.find(f)
    64         //val Some(d) = alphabet.find(f)
    36         d match {
    65         d match {
   274     //attaching a bit Z to every alts to signify that they come from the original regular expression)
   303     //attaching a bit Z to every alts to signify that they come from the original regular expression)
   275     var old = brternalise(r)
   304     var old = brternalise(r)
   276     //this is for comparison between normal simp and the weakened version of simp
   305     //this is for comparison between normal simp and the weakened version of simp
   277     //normal simp will be performed on syncold
   306     //normal simp will be performed on syncold
   278     //weakend simp will be performed on old
   307     //weakend simp will be performed on old
   279     var syncold = brternalise(r)
   308     var syncold = internalise(r)
   280     val all_chars = s.toList
   309     val all_chars = s.toList
   281     for (i <- 0 to s.length - 1){
   310     for (i <- 0 to s.length - 1){
   282       val syncder_res = brder(all_chars(i), syncold)
   311       val syncder_res = bder(all_chars(i), syncold)
   283       val syncsimp_res = strong_br_simp(syncder_res)
   312       val syncsimp_res = super_bsimp(syncder_res)
   284       //see brder for detailed explanation
   313       //see brder for detailed explanation
   285       //just changes bit Z to S when deriving an ALTS, 
   314       //just changes bit Z to S when deriving an ALTS, 
   286       //signifying that the structure has been "touched" and
   315       //signifying that the structure has been "touched" and
   287       //therefore able to be spilled in the bspill function
   316       //therefore able to be spilled in the bspill function
   288       val der_res =  brder(all_chars(i), old)
   317       val der_res =  brder(all_chars(i), old)
   289       val simp_res = br_simp(der_res)
   318       val simp_res = br_simp(der_res)
   290       val anatomy = bspill(simp_res)
   319       val anatomy = bspill(simp_res)
   291       //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*)
   320       //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*)
   292       if(f(anatomy, pd)  == false || i == 1){
   321       if(f(List(berase(simp_res)), pd)  == false ){
   293         println(size(berase(syncsimp_res)))
   322         println(size(erase(syncsimp_res)))
   294         println(size(berase(simp_res)))
   323         println(size(berase(simp_res)))
   295         println(bregx_tree(simp_res))
   324         println(bregx_tree(simp_res))
       
   325         println(s)
       
   326         println(i)
       
   327         println(r)
       
   328         println()
   296         println(anatomy.map(size).sum)
   329         println(anatomy.map(size).sum)
   297         println(pd.map(size).sum)
   330         println(pd.map(size).sum)
   298       }  
   331       }  
   299       old = simp_res
   332       old = simp_res
   300       syncold = syncsimp_res
   333       syncold = syncsimp_res
   309       println("inclusion not true")
   342       println("inclusion not true")
   310       false
   343       false
   311     }
   344     }
   312   }
   345   }
   313   def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
   346   def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
   314   def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true}
   347   def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true}
   315   def ders_simp(r: ARexp, s: List[Char]): ARexp = {
   348   def ders_simp(r: ARexp, s: List[Char]): ARexp = {
   316     s match {
   349     s match {
   317       case Nil => r 
   350       case Nil => r 
   318       case c::cs => ders_simp(bsimp(bder(c, r)), cs)
   351       case c::cs => ders_simp(bsimp(bder(c, r)), cs)
   319     }
   352     }
   320   }
   353   }
       
   354   val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c')))))))
       
   355   val str_panda = "ccccb"
   321   def check_all(){
   356   def check_all(){
   322     for(i <- 1 to 1)
   357 
   323     {
   358         weak_sub_check(big_panda, str_panda, 6, size_expansion_rate)
   324         val s = "bb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5)
   359 
   325         val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
   360   }
   326         //subset_check(r, s)
   361   //simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set)
   327         weak_sub_check(r, s, 5, size_expansion_rate)
   362 
   328     }
       
   329   }
       
   330   def correctness_proof_convenient_path(){
   363   def correctness_proof_convenient_path(){
   331     for(i <- 1 to 1)
   364     for(i <- 1 to 1)
   332     {
   365     {
   333         val s = "abaa"//rd_string_gen(alphabet_size, 3)
   366         val s = "abaa"//rd_string_gen(alphabet_size, 3)
   334         val r = ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
   367         val r = ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
   335         for(j <- 0 to s.length - 1){
   368         for(j <- 0 to s.length - 1){
   336           val ss = s.slice(0, j+ 1)
   369           val ss = s.slice(0, j+ 1)
   337           val nangao = ders_simp(r, ss.toList)
   370           val nangao = ders_simp(r, ss.toList)
   338           val easy = bsimp(bders(ss.toList, r))
   371           val easy = bsimp(bders(ss.toList, r))
   339           if(nangao != easy|| j == 2){
   372           if(true){
   340             println(j)
   373             println(j)
   341             if(j == 3) println("not equal")
   374             if(j == 3) println("not equal")
       
   375             println("string")
   342             println(ss)
   376             println(ss)
       
   377             println("original regex")
   343             println(r)
   378             println(r)
       
   379             println("regex after ders simp")
   344             println(nangao)
   380             println(nangao)
       
   381             println("regex after ders")
       
   382             println(bders(ss.toList, r))
       
   383             println("regex after ders and then a single simp")
   345             println(easy)
   384             println(easy)
   346           }
   385           }
   347         }
   386         }
   348     }
   387     }
   349   }
   388   }
   350   def main(args: Array[String]) {
   389   def main(args: Array[String]) {
   351     //check_all()   
   390     //check_all()   
   352     correctness_proof_convenient_path
   391     enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
       
   392     //correctness_proof_convenient_path()
   353   } 
   393   } 
   354 }
   394 }
   355 
   395