exps/bit.scala
changeset 316 0eaa1851a5b6
parent 315 ab7fe342e004
child 317 db0ff630bbb7
equal deleted inserted replaced
315:ab7fe342e004 316:0eaa1851a5b6
    30 
    30 
    31 // usual regular expressions
    31 // usual regular expressions
    32 abstract class Rexp 
    32 abstract class Rexp 
    33 case object ZERO extends Rexp
    33 case object ZERO extends Rexp
    34 case object ONE extends Rexp
    34 case object ONE extends Rexp
    35 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp 
    35 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp {
       
    36   override def toString = "'" ++ s ++ "'"
       
    37 }
    36 case class ALTS(rs: List[Rexp]) extends Rexp 
    38 case class ALTS(rs: List[Rexp]) extends Rexp 
    37 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    39 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    38 case class STAR(r: Rexp) extends Rexp 
    40 case class STAR(r: Rexp) extends Rexp 
    39 case class RECD(x: String, r: Rexp) extends Rexp
    41 case class RECD(x: String, r: Rexp) extends Rexp
    40 
    42 
    47 
    49 
    48 // annotated regular expressions
    50 // annotated regular expressions
    49 abstract class ARexp 
    51 abstract class ARexp 
    50 case object AZERO extends ARexp
    52 case object AZERO extends ARexp
    51 case class AONE(bs: Bits) extends ARexp
    53 case class AONE(bs: Bits) extends ARexp
    52 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp
    54 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp {
       
    55   override def toString = "'" ++ s ++ "'"
       
    56 }
    53 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    54 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    55 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    56 
    60 
    57 // abbreviations
    61 // abbreviations
    92   def $ (r: Rexp) = RECD(s, r)
    96   def $ (r: Rexp) = RECD(s, r)
    93 }
    97 }
    94 
    98 
    95 
    99 
    96 // string of a regular expressions - for testing purposes
   100 // string of a regular expressions - for testing purposes
    97 def string(r: Rexp): String = r match {
   101 def string(r: Rexp, s: String = ""): String = r match {
    98   case ZERO => "0"
   102   case ZERO => "0"
    99   case ONE => "1"
   103   case ONE => "1"
   100   case PRED(_, s) => s
   104   case PRED(_, s1) => s1
   101   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
   105   case ALTS(rs) => rs.map(string(_, s ++ " ")).mkString("[", ",\n" ++ s ++ "|", "]")
   102   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
   106   case SEQ(r1, r2) => {
   103   case STAR(r) => s"{${string(r)}}*"
   107     val s1 = string(r1, s)
   104   case RECD(x, r) => s"(${x}! ${string(r)})"
   108     val i = (s1 ++ "\n").toList.indexOf('\n')
       
   109     val s2 = string(r2, (" " * i) + "   ")
       
   110     s"${s1} ~ ${s2}"
       
   111   }
       
   112   case STAR(r) => s"<${string(r, s ++ " ")}>*"
       
   113   case RECD(x, r) => s"(${x}! ${string(r, s)})"
   105 }
   114 }
   106 
   115 
   107 def strings(rs: Set[Rexp]): String =
   116 def strings(rs: Set[Rexp]): String =
   108   rs.map(string).mkString("{", "|", "}")
   117   rs.map(string(_, "")).mkString("{", ",\n|", "}")
   109 
   118 
   110 // string of an annotated regular expressions - for testing purposes
   119 // string of an annotated regular expressions - for testing purposes
   111 
   120 def astring(a: ARexp, s: String = ""): String = a match {
   112 def astring(a: ARexp): String = a match {
       
   113   case AZERO => "0"
   121   case AZERO => "0"
   114   case AONE(_) => "1"
   122   case AONE(_) => "1"
   115   case APRED(_, _, s) => s
   123   case APRED(_, _, s) => s
   116   case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]")
   124   case AALTS(_, rs) => rs.map(astring(_, s ++ " ")).mkString("[", ",\n" ++ s ++ "|", "]")
   117   case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})"
   125   case ASEQ(_, r1, r2) => {
   118   case ASTAR(_, r) => s"{${astring(r)}}*"
   126     val s1 = astring(r1, s)
   119 }
   127     val i = (s1 ++ "\n").toList.indexOf('\n')
       
   128     val s2 = astring(r2, (" " * i) + "   ")
       
   129     s"${s1} ~ ${s2}"
       
   130   }
       
   131   case ASTAR(_, r) => s"<${astring(r, s ++ " ")}>*"
       
   132 }
       
   133 
   120  
   134  
   121 
   135 
   122 //--------------------------------------------------------------
   136 //--------------------------------------------------------------
   123 // START OF NON-BITCODE PART
   137 // START OF NON-BITCODE PART
   124 //
   138 //
   300 def pders_simp(cs: List[Char], r: Rexp): Set[Rexp] = cs match {
   314 def pders_simp(cs: List[Char], r: Rexp): Set[Rexp] = cs match {
   301   case Nil => Set(r)
   315   case Nil => Set(r)
   302   case c::cs => pder(c, r).flatMap(pders_simp(cs, _)).map(simp(_)._1)
   316   case c::cs => pder(c, r).flatMap(pders_simp(cs, _)).map(simp(_)._1)
   303 }
   317 }
   304 
   318 
   305 def psize(rs: Set[Rexp])  = 
       
   306   rs.map(size).sum
       
   307 
   319 
   308 //--------------------------------------------------------------------
   320 //--------------------------------------------------------------------
   309 // BITCODED PART
   321 // BITCODED PART
   310 
   322 
   311 
   323 
   321 // translation into ARexps
   333 // translation into ARexps
   322 def internalise(r: Rexp) : ARexp = r match {
   334 def internalise(r: Rexp) : ARexp = r match {
   323   case ZERO => AZERO
   335   case ZERO => AZERO
   324   case ONE => AONE(Nil)
   336   case ONE => AONE(Nil)
   325   case PRED(f, s) => APRED(Nil, f, s)
   337   case PRED(f, s) => APRED(Nil, f, s)
       
   338   case ALTS(List(r1)) => 
       
   339     AALTS(Nil, List(fuse(List(Z), internalise(r1))))
   326   case ALTS(List(r1, r2)) => 
   340   case ALTS(List(r1, r2)) => 
   327     AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
   341     AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
   328   case ALTS(r1::rs) => {
   342   case ALTS(r1::rs) => {
   329      val AALTS(Nil, rs2) = internalise(ALTS(rs))
   343      val AALTS(Nil, rs2) = internalise(ALTS(rs))
   330      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
   344      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
   431     case AZERO :: rs1 => flats(rs1)
   445     case AZERO :: rs1 => flats(rs1)
   432     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   446     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   433     case r1 :: rs2 => r1 :: flats(rs2)
   447     case r1 :: rs2 => r1 :: flats(rs2)
   434 }
   448 }
   435 
   449 
       
   450 def stack(r1: ARexp, r2: ARexp) = r1 match {
       
   451   case AONE(bs2) => fuse(bs2, r2)
       
   452   case _ => ASEQ(Nil, r1, r2)
       
   453 }
       
   454 
   436 def bsimp(r: ARexp): ARexp = r match {
   455 def bsimp(r: ARexp): ARexp = r match {
   437   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   456   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   438       case (AZERO, _) => AZERO
   457       case (AZERO, _) => AZERO
   439       case (_, AZERO) => AZERO
   458       case (_, AZERO) => AZERO
   440       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   459       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   441       //case (AALTS(bs2, rs), r2s) =>  
   460       case (AALTS(bs2, rs), r2s) =>  
   442       //  AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s)))
   461         AALTS(bs1 ++ bs2, rs.map(stack(_, r2s)))
   443       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   462       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   444   }
   463   }
   445   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match {
   464   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match {
   446     case Nil => AZERO
   465     case Nil => AZERO
   447     case r :: Nil => fuse(bs1, r)
   466     case r :: Nil => fuse(bs1, r)
   448     case rs => AALTS(bs1, rs)  
   467     case rs2 => AALTS(bs1, rs2)  
   449   }
   468   }
   450   //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1))
   469   //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1))
   451   case r => r
   470   case r => r
   452 }
   471 }
   453 
   472 
   477 def bsimp_full(r: ARexp): ARexp = r match {
   496 def bsimp_full(r: ARexp): ARexp = r match {
   478   case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match {
   497   case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match {
   479       case (AZERO, _) => AZERO
   498       case (AZERO, _) => AZERO
   480       case (_, AZERO) => AZERO
   499       case (_, AZERO) => AZERO
   481       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   500       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   482       //case (AALTS(bs2, rs), r2s) =>  
   501       case (AALTS(bs2, rs), r2s) =>  
   483       //  AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s)))
   502         AALTS(bs1 ++ bs2, rs.map(ASEQ(Nil, _, r2s)))
   484       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   503       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   485   }
   504   }
   486   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match {
   505   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match {
   487     case Nil => AZERO
   506     case Nil => AZERO
   488     case r :: Nil => fuse(bs1, r)
   507     case r :: Nil => fuse(bs1, r)
   633   case STAR(r) => 1 + size(r)
   652   case STAR(r) => 1 + size(r)
   634   case RECD(_, r) => size(r)
   653   case RECD(_, r) => size(r)
   635 }
   654 }
   636 
   655 
   637 def asize(a: ARexp) = size(erase(a))
   656 def asize(a: ARexp) = size(erase(a))
       
   657 
       
   658 def psize(rs: Set[Rexp]) = rs.map(size).sum
   638 
   659 
   639 
   660 
   640 // Lexing Rules for a Small While Language
   661 // Lexing Rules for a Small While Language
   641 
   662 
   642 //symbols
   663 //symbols
   677 
   698 
   678 
   699 
   679 // Some Small Tests
   700 // Some Small Tests
   680 //==================
   701 //==================
   681 
   702 
       
   703 // string of a regular expressions - for testing purposes
       
   704 
       
   705 
       
   706 string(ALTS(List("a","b",ALTS(List("0","1","2")),"c"))) 
       
   707 string(ALTS(List("a","b",ALTS(List("0","1",ALTS(List("X","Y")),"2")),"c"))) 
       
   708 string(ALTS(List("aa","b",ALTS(List("0","1","2")),"c"))) 
       
   709 string(ALTS(List("aa","b",SEQ("Q", ALTS(List("0","1","2"))),"c"))) 
       
   710 string(ALTS(List("aa","b",SEQ(SEQ("Q", ALTS(List("0","1","2"))),"W"),"c"))) 
       
   711 string(ALTS(List("aa","b",SEQ("Q", STAR(ALTS(List("0","1","2")))),"c"))) 
       
   712 string(ALTS(List("aaa","bbbb",ALTS(List("000","1111","2222")),"ccccc"))) 
       
   713 
   682 println("Small tests")
   714 println("Small tests")
   683 
   715 
   684 val q = STAR(STAR("bb" | ("a" | "b")))
   716 val q = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c')))))))
   685 val qs = "bb"
   717 val qs = "cccc"
       
   718 
       
   719 //val q = STAR(STAR("bb" | ("a" | "b")))
       
   720 //val qs = "bbbbbbb"
       
   721 
       
   722 println("Size Bit  " + asize(bders_simp(qs.toList, internalise(q))))
       
   723 println("Size Pder " + psize(pders_simp(qs.toList, q)))
       
   724 println(astring(bders_simp(qs.toList, internalise(q))))
       
   725 println(strings(pders_simp(qs.toList, q)))
   686 
   726 
   687 println("Size Bit  " + asize(bders_simp(qs.toList, internalise(q))))
   727 println("Size Bit  " + asize(bders_simp(qs.toList, internalise(q))))
   688 println("Size Bitf " + asize(bders_simp_full(qs.toList, internalise(q))))
   728 println("Size Bitf " + asize(bders_simp_full(qs.toList, internalise(q))))
   689 println("Size Bit2 " + asize(bders2_simp(qs.toList, internalise(q))))
   729 println("Size Bit2 " + asize(bders2_simp(qs.toList, internalise(q))))
   690 println("Size Old  " + size(ders_simp(qs.toList, q)))
   730 println("Size Old  " + size(ders_simp(qs.toList, q)))
   691 println("Size Pder " + psize(pders(qs.toList, q)))
   731 println("Size Pder " + psize(pders(qs.toList, q)))
   692 println("Size Pder simp " + psize(pders_simp(qs.toList, q)))
   732 println("Size Pder simp " + psize(pders_simp(qs.toList, q)))
   693 
   733 
   694 println(astring(bders_simp(qs.toList, internalise(q))))
   734 println(astring(bders_simp(qs.toList, internalise(q))))
   695 println(astring(bders_simp_full(qs.toList, internalise(q))))
   735 //println(astring(bders_simp_full(qs.toList, internalise(q))))
   696 println(string(ders_simp(qs.toList, q)))
   736 //println(string(ders_simp(qs.toList, q)))
   697 println(strings(pders(qs.toList, q)))
   737 //println(strings(pders(qs.toList, q)))
   698 println(strings(pders_simp(qs.toList, q)))
   738 println(strings(pders_simp(qs.toList, q)))
       
   739 
       
   740 
   699 
   741 
   700 System.exit(0)
   742 System.exit(0)
   701 
   743 
   702 val re1 = STAR("a" | "aa")
   744 val re1 = STAR("a" | "aa")
   703 println(astring(bders_simp("".toList, internalise(re1))))
   745 println(astring(bders_simp("".toList, internalise(re1))))