exps/both.scala
changeset 305 6e2cef17a9b3
parent 300 b7987014fed8
child 306 6756b026c5fe
equal deleted inserted replaced
304:82a99eec5b73 305:6e2cef17a9b3
    29 
    29 
    30 // usual regular expressions
    30 // usual regular expressions
    31 abstract class Rexp 
    31 abstract class Rexp 
    32 case object ZERO extends Rexp
    32 case object ZERO extends Rexp
    33 case object ONE extends Rexp
    33 case object ONE extends Rexp
    34 case class PRED(f: Char => Boolean) extends Rexp
    34 case class PRED(f: Char => Boolean, s: String = "_") extends Rexp 
    35 case class ALTS(rs: List[Rexp]) extends Rexp 
    35 case class ALTS(rs: List[Rexp]) extends Rexp 
    36 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    36 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    37 case class STAR(r: Rexp) extends Rexp 
    37 case class STAR(r: Rexp) extends Rexp 
    38 case class RECD(x: String, r: Rexp) extends Rexp
    38 case class RECD(x: String, r: Rexp) extends Rexp
    39 
    39 
    40 
    40 
    41 // abbreviations
    41 // abbreviations
    42 def CHAR(c: Char) = PRED(_ == c)
    42 def CHAR(c: Char) = PRED(_ == c, c.toString)
    43 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
    43 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
    44 def PLUS(r: Rexp) = SEQ(r, STAR(r))
    44 def PLUS(r: Rexp) = SEQ(r, STAR(r))
    45 
    45 
    46 // annotated regular expressions
    46 // annotated regular expressions
    47 abstract class ARexp 
    47 abstract class ARexp 
    48 case object AZERO extends ARexp
    48 case object AZERO extends ARexp
    49 case class AONE(bs: Bits) extends ARexp
    49 case class AONE(bs: Bits) extends ARexp
    50 case class APRED(bs: Bits, f: Char => Boolean) extends ARexp
    50 case class APRED(bs: Bits, f: Char => Boolean, s: String = "_") extends ARexp
    51 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    51 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    52 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    52 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    53 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    53 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    54 
    54 
    55 // abbreviations
    55 // abbreviations
    90   def $ (r: Rexp) = RECD(s, r)
    90   def $ (r: Rexp) = RECD(s, r)
    91 }
    91 }
    92 
    92 
    93 
    93 
    94 // string of a regular expressions - for testing purposes
    94 // string of a regular expressions - for testing purposes
    95   def string(r: Rexp): String = r match {
    95 def string(r: Rexp): String = r match {
    96     case ZERO => "0"
    96   case ZERO => "0"
    97     case ONE => "1"
    97   case ONE => "1"
    98     case PRED(_) => "_"
    98   case PRED(_, s) => s
    99     case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
    99   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
   100     case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
   100   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
   101     case STAR(r) => s"{${string(r)}}*"
   101   case STAR(r) => s"{${string(r)}}*"
   102     case RECD(x, r) => s"(${x}! ${string(r)})"
   102   case RECD(x, r) => s"(${x}! ${string(r)})"
   103   }
   103 }
       
   104 
       
   105 // string of an annotated regular expressions - for testing purposes
       
   106 
       
   107 def astring(a: ARexp): String = a match {
       
   108   case AZERO => "0"
       
   109   case AONE(_) => "1"
       
   110   case APRED(_, _, s) => s
       
   111   case AALTS(_, rs) => rs.map(astring).mkString("[", "|", "]")
       
   112   case ASEQ(_, r1, r2) => s"(${astring(r1)} ~ ${astring(r2)})"
       
   113   case ASTAR(_, r) => s"{${astring(r)}}*"
       
   114 }
       
   115  
   104 
   116 
   105 //--------------------------------------------------------------------------------------------------------
   117 //--------------------------------------------------------------------------------------------------------
   106 // START OF NON-BITCODE PART
   118 // START OF NON-BITCODE PART
   107 //
   119 //
   108 
   120 
   109 // nullable function: tests whether the regular 
   121 // nullable function: tests whether the regular 
   110 // expression can recognise the empty string
   122 // expression can recognise the empty string
   111 def nullable (r: Rexp) : Boolean = r match {
   123 def nullable (r: Rexp) : Boolean = r match {
   112   case ZERO => false
   124   case ZERO => false
   113   case ONE => true
   125   case ONE => true
   114   case PRED(_) => false
   126   case PRED(_, _) => false
   115   case ALTS(rs) => rs.exists(nullable)
   127   case ALTS(rs) => rs.exists(nullable)
   116   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   128   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   117   case STAR(_) => true
   129   case STAR(_) => true
   118   case RECD(_, r) => nullable(r)
   130   case RECD(_, r) => nullable(r)
   119 }
   131 }
   120 
   132 
   121 // derivative of a regular expression w.r.t. a character
   133 // derivative of a regular expression w.r.t. a character
   122 def der (c: Char, r: Rexp) : Rexp = r match {
   134 def der (c: Char, r: Rexp) : Rexp = r match {
   123   case ZERO => ZERO
   135   case ZERO => ZERO
   124   case ONE => ZERO
   136   case ONE => ZERO
   125   case PRED(f) => if (f(c)) ONE else ZERO
   137   case PRED(f, _) => if (f(c)) ONE else ZERO
   126   case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
   138   case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
   127   case SEQ(r1, r2) => 
   139   case SEQ(r1, r2) => 
   128     if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
   140     if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
   129     else SEQ(der(c, r1), r2)
   141     else SEQ(der(c, r1), r2)
   130   case STAR(r) => SEQ(der(c, r), STAR(r))
   142   case STAR(r) => SEQ(der(c, r), STAR(r))
   169   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   181   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   170   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   182   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   171   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   183   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   172   case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
   184   case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
   173   case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
   185   case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
   174   case (PRED(_), Empty) => Chr(c) 
   186   case (PRED(_, _), Empty) => Chr(c) 
   175   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   187   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   176 }
   188 }
   177 
   189 
   178 // lexing without simplification
   190 // lexing without simplification
   179 def lex(r: Rexp, s: List[Char]) : Val = s match {
   191 def lex(r: Rexp, s: List[Char]) : Val = s match {
   262 
   274 
   263 
   275 
   264 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
   276 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
   265   case AZERO => AZERO
   277   case AZERO => AZERO
   266   case AONE(cs) => AONE(bs ++ cs)
   278   case AONE(cs) => AONE(bs ++ cs)
   267   case APRED(cs, f) => APRED(bs ++ cs, f)
   279   case APRED(cs, f, s) => APRED(bs ++ cs, f, s)
   268   case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
   280   case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
   269   case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
   281   case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
   270   case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
   282   case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
   271 }
   283 }
   272 
   284 
   273 // translation into ARexps
   285 // translation into ARexps
   274 def internalise(r: Rexp) : ARexp = r match {
   286 def internalise(r: Rexp) : ARexp = r match {
   275   case ZERO => AZERO
   287   case ZERO => AZERO
   276   case ONE => AONE(Nil)
   288   case ONE => AONE(Nil)
   277   case PRED(f) => APRED(Nil, f)
   289   case PRED(f, s) => APRED(Nil, f, s)
   278   case ALTS(List(r1, r2)) => 
   290   case ALTS(List(r1, r2)) => 
   279     AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
   291     AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
   280   case ALTS(r1::rs) => {
   292   case ALTS(r1::rs) => {
   281      val AALTS(Nil, rs2) = internalise(ALTS(rs))
   293      val AALTS(Nil, rs2) = internalise(ALTS(rs))
   282      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
   294      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
   289 internalise(("a" | "ab") ~ ("b" | ""))
   301 internalise(("a" | "ab") ~ ("b" | ""))
   290 
   302 
   291 // decoding of values from bit sequences
   303 // decoding of values from bit sequences
   292 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
   304 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
   293   case (ONE, bs) => (Empty, bs)
   305   case (ONE, bs) => (Empty, bs)
   294   case (PRED(f), C(c)::bs) => (Chr(c), bs)
   306   case (PRED(f, _), C(c)::bs) => (Chr(c), bs)
   295   case (ALTS(r::Nil), bs) => decode_aux(r, bs)
   307   case (ALTS(r::Nil), bs) => decode_aux(r, bs)
   296   case (ALTS(rs), bs) => bs match {
   308   case (ALTS(rs), bs) => bs match {
   297     case Z::bs1 => {
   309     case Z::bs1 => {
   298       val (v, bs2) = decode_aux(rs.head, bs1)
   310       val (v, bs2) = decode_aux(rs.head, bs1)
   299       (Left(v), bs2)
   311       (Left(v), bs2)
   328 
   340 
   329 //erase function: extracts a Rexp from Arexp
   341 //erase function: extracts a Rexp from Arexp
   330 def erase(r: ARexp) : Rexp = r match{
   342 def erase(r: ARexp) : Rexp = r match{
   331   case AZERO => ZERO
   343   case AZERO => ZERO
   332   case AONE(_) => ONE
   344   case AONE(_) => ONE
   333   case APRED(bs, f) => PRED(f)
   345   case APRED(bs, f, s) => PRED(f, s)
   334   case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
   346   case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
   335   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
   347   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
   336   case ASTAR(cs, r)=> STAR(erase(r))
   348   case ASTAR(cs, r)=> STAR(erase(r))
   337 }
   349 }
   338 
   350 
   340 // bnullable function: tests whether the aregular 
   352 // bnullable function: tests whether the aregular 
   341 // expression can recognise the empty string
   353 // expression can recognise the empty string
   342 def bnullable (r: ARexp) : Boolean = r match {
   354 def bnullable (r: ARexp) : Boolean = r match {
   343   case AZERO => false
   355   case AZERO => false
   344   case AONE(_) => true
   356   case AONE(_) => true
   345   case APRED(_,_) => false
   357   case APRED(_,_,_) => false
   346   case AALTS(_, rs) => rs.exists(bnullable)
   358   case AALTS(_, rs) => rs.exists(bnullable)
   347   case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
   359   case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
   348   case ASTAR(_, _) => true
   360   case ASTAR(_, _) => true
   349 }
   361 }
   350 
   362 
   360 
   372 
   361 // derivative of a regular expression w.r.t. a character
   373 // derivative of a regular expression w.r.t. a character
   362 def bder(c: Char, r: ARexp) : ARexp = r match {
   374 def bder(c: Char, r: ARexp) : ARexp = r match {
   363   case AZERO => AZERO
   375   case AZERO => AZERO
   364   case AONE(_) => AZERO
   376   case AONE(_) => AZERO
   365   case APRED(bs, f) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
   377   case APRED(bs, f, _) => if (f(c)) AONE(bs:::List(C(c))) else AZERO
   366   case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
   378   case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
   367   case ASEQ(bs, r1, r2) => 
   379   case ASEQ(bs, r1, r2) => 
   368     if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
   380     if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
   369     else ASEQ(bs, bder(c, r1), r2)
   381     else ASEQ(bs, bder(c, r1), r2)
   370   case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
   382   case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
   375 @tailrec
   387 @tailrec
   376 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   388 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   377   case Nil => r
   389   case Nil => r
   378   case c::s => bders(s, bder(c, r))
   390   case c::s => bders(s, bder(c, r))
   379 }
   391 }
   380 
       
   381 
   392 
   382 def flats(rs: List[ARexp]): List[ARexp] = rs match {
   393 def flats(rs: List[ARexp]): List[ARexp] = rs match {
   383     case Nil => Nil
   394     case Nil => Nil
   384     case AZERO :: rs1 => flats(rs1)
   395     case AZERO :: rs1 => flats(rs1)
   385     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   396     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   402   case r => r
   413   case r => r
   403 }
   414 }
   404 
   415 
   405 
   416 
   406 
   417 
       
   418 
   407 def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
   419 def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
   408   case Nil => r
   420   case Nil => r
   409   case c::s => bders_simp(s, bsimp(bder(c, r)))
   421   case c::s => bders_simp(s, bsimp(bder(c, r)))
   410 }
   422 }
   411 
   423 
   483 
   495 
   484 //size: of a Aregx for testing purposes 
   496 //size: of a Aregx for testing purposes 
   485 def size(r: Rexp) : Int = r match {
   497 def size(r: Rexp) : Int = r match {
   486   case ZERO => 1
   498   case ZERO => 1
   487   case ONE => 1
   499   case ONE => 1
   488   case PRED(_) => 1
   500   case PRED(_,_) => 1
   489   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
   501   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
   490   case ALTS(rs) => 1 + rs.map(size).sum
   502   case ALTS(rs) => 1 + rs.map(size).sum
   491   case STAR(r) => 1 + size(r)
   503   case STAR(r) => 1 + size(r)
   492   case RECD(_, r) => size(r)
   504   case RECD(_, r) => size(r)
   493 }
   505 }
   496 
   508 
   497 
   509 
   498 // Lexing Rules for a Small While Language
   510 // Lexing Rules for a Small While Language
   499 
   511 
   500 //symbols
   512 //symbols
   501 val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
   513 val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_), "SYM")
   502 //digits
   514 //digits
   503 val DIGIT = PRED("0123456789".contains(_))
   515 val DIGIT = PRED("0123456789".contains(_), "NUM")
   504 //identifiers
   516 //identifiers
   505 val ID = SYM ~ (SYM | DIGIT).% 
   517 val ID = SYM ~ (SYM | DIGIT).% 
   506 //numbers
   518 //numbers
   507 val NUM = STAR(DIGIT)
   519 val NUM = STAR(DIGIT)
   508 //keywords
   520 //keywords
   576   print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i)))
   588   print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i)))
   577   print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i)))
   589   print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i)))
   578   println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i)))
   590   println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i)))
   579 }
   591 }
   580 
   592 
       
   593 
   581 println("Original " + size(WHILE_REGS))
   594 println("Original " + size(WHILE_REGS))
   582 println("Size Bit  " + asize(bders_simp((fib_prog * 10).toList, internalise(WHILE_REGS))))
   595 println("Size Bit  " + asize(bders_simp((fib_prog * 1).toList, internalise(WHILE_REGS))))
   583 println("Size Bitf " + asize(bders_simp_full((fib_prog * 10).toList, internalise(WHILE_REGS))))
   596 println("Size Bitf " + asize(bders_simp_full((fib_prog * 1).toList, internalise(WHILE_REGS))))
   584 println("Size Old  " + size(ders_simp((fib_prog * 10).toList, WHILE_REGS)))
   597 println("Size Old  " + size(ders_simp((fib_prog * 1).toList, WHILE_REGS)))
   585 
   598 
   586 
   599 
   587 //System.exit(0)
   600 System.exit(0)
   588 
   601 
   589 println("Internal sizes test OK or strange")
   602 println("Internal sizes test OK or strange")
   590 
   603 
   591 def perc(p1: Double, p2: Double) : String =
   604 def perc(p1: Double, p2: Double) : String =
   592   f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%"
   605   f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%"
   660 println("Partial searching: ")
   673 println("Partial searching: ")
   661 enum(2, "abc").map(tests(strs(3, "abc"))).toSet
   674 enum(2, "abc").map(tests(strs(3, "abc"))).toSet
   662 
   675 
   663 
   676 
   664 
   677 
       
   678