progs/scala/re-annotated2.sc
changeset 361 8bb064045b4e
parent 360 e752d84225ec
child 644 9f984ff20020
equal deleted inserted replaced
360:e752d84225ec 361:8bb064045b4e
    49 case class ACHARSET(bs: Bits, f: Char => Boolean) extends ARexp
    49 case class ACHARSET(bs: Bits, f: Char => Boolean) extends ARexp
    50 
    50 
    51 // an abbreviation for binary alternatives
    51 // an abbreviation for binary alternatives
    52 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
    52 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
    53 
    53 
    54 abstract class Val
    54 
    55 case object Empty extends Val
       
    56 case class Chr(c: Char) extends Val
       
    57 case class Sequ(v1: Val, v2: Val) extends Val
       
    58 case class Left(v: Val) extends Val
       
    59 case class Right(v: Val) extends Val
       
    60 case class Stars(vs: List[Val]) extends Val
       
    61 case class Recd(x: String, v: Val) extends Val
       
    62    
       
    63 // some convenience for typing in regular expressions
    55 // some convenience for typing in regular expressions
    64 def charlist2rexp(s: List[Char]): Rexp = s match {
    56 def charlist2rexp(s: List[Char]): Rexp = s match {
    65   case Nil => ONE
    57   case Nil => ONE
    66   case c::Nil => CHAR(c)
    58   case c::Nil => CHAR(c)
    67   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    59   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    80   def % = STAR(s)
    72   def % = STAR(s)
    81   def ~ (r: Rexp) = SEQ(s, r)
    73   def ~ (r: Rexp) = SEQ(s, r)
    82   def ~ (r: String) = SEQ(s, r)
    74   def ~ (r: String) = SEQ(s, r)
    83   def $ (r: Rexp) = RECD(s, r)
    75   def $ (r: Rexp) = RECD(s, r)
    84 }
    76 }
    85 
       
    86 def size(r: Rexp) : Int = r match {
       
    87   case ZERO => 1
       
    88   case ONE => 1
       
    89   case ALT(r1, r2) => 1 + size(r1) + size(r2)
       
    90   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    91   case STAR(r) => 1 + size(r)
       
    92   case RECD(_, r) => 1 + size(r)
       
    93   case CHARSET(_) => 1
       
    94 }
       
    95 
       
    96 
    77 
    97 
    78 
    98 // Bitcoded + Annotation
    79 // Bitcoded + Annotation
    99 //=======================
    80 //=======================
   100 
    81 
   132 
   113 
   133 // example
   114 // example
   134 // internalise(("a" | "ab") ~ ("b" | ""))
   115 // internalise(("a" | "ab") ~ ("b" | ""))
   135 
   116 
   136 
   117 
   137 // decoding of a value from a bitsequence
       
   138 // (this is not tail-recursive and therefore a potential bottleneck)
       
   139 def vdecode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
       
   140   case (ONE, bs) => (Empty, bs)
       
   141   case (ALT(r1, r2), Z::bs) => {
       
   142     val (v, bs1) = vdecode_aux(r1, bs)
       
   143     (Left(v), bs1)
       
   144   }
       
   145   case (ALT(r1, r2), S::bs) => {
       
   146     val (v, bs1) = vdecode_aux(r2, bs)
       
   147     (Right(v), bs1)
       
   148   }
       
   149   case (SEQ(r1, r2), bs) => {
       
   150     val (v1, bs1) = vdecode_aux(r1, bs)
       
   151     val (v2, bs2) = vdecode_aux(r2, bs1)
       
   152     (Sequ(v1, v2), bs2)
       
   153   }
       
   154   case (STAR(r1), Z::bs) => {
       
   155     val (v, bs1) = vdecode_aux(r1, bs)
       
   156     val (Stars(vs), bs2) = vdecode_aux(STAR(r1), bs1)
       
   157     (Stars(v::vs), bs2)
       
   158   }
       
   159   case (STAR(_), S::bs) => (Stars(Nil), bs)
       
   160   case (RECD(s, r1), bs) => 
       
   161     val (v, bs1) = vdecode_aux(r1, bs)
       
   162     (Recd(s, v), bs1)
       
   163   case (CHARSET(_), C(c)::bs) => (Chr(c), bs)
       
   164 }
       
   165 
       
   166 def vdecode(r: Rexp, bs: Bits) = vdecode_aux(r, bs) match {
       
   167   case (v, Nil) => v
       
   168   case _ => throw new Exception("Not decodable")
       
   169 }
       
   170 
       
   171 // decoding of sequence of string tokens from a bitsequence
   118 // decoding of sequence of string tokens from a bitsequence
   172 // tail-recursive version using an accumulator (alternative for 
   119 // tail-recursive version using an accumulator
   173 // vdecode)
       
   174 @tailrec
   120 @tailrec
   175 def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match {
   121 def sdecode_aux(rs: List[Rexp], bs: Bits, acc: List[String]) : List[String] = (rs, bs) match {
   176   case (Nil, _) => acc
   122   case (Nil, _) => acc
   177   case (_, Nil) => acc
   123   case (_, Nil) => acc
   178   case (ONE::rest, bs) => sdecode_aux(rest, bs, acc)
   124   case (ONE::rest, bs) => sdecode_aux(rest, bs, acc)
   224   case ACHARSET(bs, f) => if(f(c)) AONE(bs :+ C(c)) else AZERO
   170   case ACHARSET(bs, f) => if(f(c)) AONE(bs :+ C(c)) else AZERO
   225 }
   171 }
   226 
   172 
   227 // derivative w.r.t. a string (iterates bder)
   173 // derivative w.r.t. a string (iterates bder)
   228 @tailrec
   174 @tailrec
   229 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   175 def bders(s: List[Char], r: ARexp) : ARexp = s match {
   230   case Nil => r
   176   case Nil => r
   231   case c::s => bders(s, bder(c, r))
   177   case c::s => bders(s, bder(c, r))
   232 }
   178 }
   233 
   179 
   234 // main unsimplified lexing function (produces a bitsequence)
   180 // main unsimplified lexing function (produces a bitsequence)
   236   case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched")
   182   case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched")
   237   case c::cs => blex(bder(c, r), cs)
   183   case c::cs => blex(bder(c, r), cs)
   238 }
   184 }
   239 
   185 
   240 // calls blex and decodes the value
   186 // calls blex and decodes the value
   241 def blexing(r: Rexp, s: String) : Val = 
   187 def blexing(r: Rexp, s: String) = 
   242   vdecode(r, blex(internalise(r), s.toList))
   188   sdecode(r, blex(internalise(r), s.toList))
   243 
   189 
   244 
   190 
   245 // example by Tudor
   191 // example by Tudor
   246 //val reg = (STAR("a") ~ ("b" | "c")).%
   192 //val reg = (STAR("a") ~ ("b" | "c")).%
   247 
   193 
   281   case Nil => if (bnullable(r)) bmkeps(r) 
   227   case Nil => if (bnullable(r)) bmkeps(r) 
   282               else throw new Exception("Not matched")
   228               else throw new Exception("Not matched")
   283   case c::cs => blex_simp(bsimp(bder(c, r)), cs)
   229   case c::cs => blex_simp(bsimp(bder(c, r)), cs)
   284 }
   230 }
   285 
   231 
   286 // blexing_simp decodes a value from the bitsequence (potentially slow)
   232 // blexing_simp decodes a string-list from the bitsequence
   287 // blexing2_simp decodes a string-list from the bitsequence
   233 def blexing_simp(r: Rexp, s: String) : List[String] = 
   288 def blexing_simp(r: Rexp, s: String) : Val = 
       
   289   vdecode(r, blex_simp(internalise(r), s.toList))
       
   290 
       
   291 def blexing2_simp(r: Rexp, s: String) : List[String] = 
       
   292   sdecode(r, blex_simp(internalise(r), s.toList))
   234   sdecode(r, blex_simp(internalise(r), s.toList))
   293 
   235 
   294 
   236 
   295 //println(blexing_simp(reg, "aab"))
   237 //println(blexing_simp(reg, "aab"))
   296 
   238 
   297 
   239 
   298 // extracts a string from value
   240 
   299 def flatten(v: Val) : String = v match {
   241 def size(r: Rexp) : Int = r match {
   300   case Empty => ""
   242   case ZERO => 1
   301   case Chr(c) => c.toString
   243   case ONE => 1
   302   case Left(v) => flatten(v)
   244   case CHARSET(_) => 1
   303   case Right(v) => flatten(v)
   245   case ALT(r1, r2) => 1 + size(r1) + size(r2)
   304   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
   246   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
   305   case Stars(vs) => vs.map(flatten).mkString
   247   case STAR(r) => 1 + size(r)
   306 }
   248   case RECD(_, r) => 1 + size(r)
   307 
   249 }
   308 // extracts an environment from a value
   250 
   309 def env(v: Val) : List[(String, String)] = v match {
   251 def bsize(r: ARexp) = size(erase(r))
   310   case Empty => Nil
       
   311   case Chr(c) => Nil
       
   312   case Left(v) => env(v)
       
   313   case Right(v) => env(v)
       
   314   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
   315   case Stars(vs) => vs.flatMap(env)
       
   316   case Recd(x, v) => (x, flatten(v))::env(v)
       
   317 }
       
   318 
       
   319 def bsize(a: ARexp) = size(erase(a))
       
   320 
   252 
   321 // Some Tests
   253 // Some Tests
   322 //============
   254 //============
   323 
       
   324 
   255 
   325 def time_needed[T](i: Int, code: => T) = {
   256 def time_needed[T](i: Int, code: => T) = {
   326   val start = System.nanoTime()
   257   val start = System.nanoTime()
   327   for (j <- 1 to i) code
   258   for (j <- 1 to i) code
   328   val end = System.nanoTime()
   259   val end = System.nanoTime()
   329   (end - start)/(i * 1.0e9)
   260   (end - start)/(i * 1.0e9)
   330 }
   261 }
   331 
   262 
       
   263 val ones = SEQ(SEQ(CHAR('/'), CHAR('*')), SEQ(STAR(CHAR('1')), SEQ(CHAR('*'), CHAR('/'))))
       
   264 println("sizes unsimplified")
       
   265 println(bsize(bders("/*".toList, internalise(ones))))     // 12
       
   266 println(bsize(bders("/*1".toList, internalise(ones))))    // 25
       
   267 println(bsize(bders("/*11".toList, internalise(ones))))   // 34
       
   268 println(bsize(bders("/*111".toList, internalise(ones))))  // 43
       
   269 println(bsize(bders("/*1111".toList, internalise(ones)))) // 52
       
   270 println("sizes simplified")
       
   271 println(bsize(bsimp(bders("/*".toList, internalise(ones)))))     // 6
       
   272 println(bsize(bsimp(bders("/*1".toList, internalise(ones)))))    // 6
       
   273 println(bsize(bsimp(bders("/*11".toList, internalise(ones)))))   // 6
       
   274 println(bsize(bsimp(bders("/*111".toList, internalise(ones)))))  // 6
       
   275 println(bsize(bsimp(bders("/*1111".toList, internalise(ones))))) // 6
       
   276 
       
   277 println("ones:")
       
   278 for(i <- 0 to 10000 by 1000) {
       
   279     println(s"$i: ${time_needed(1, blexing_simp(ones, "/*" ++ "1" * i ++ "*/"))}")
       
   280 }
       
   281 
       
   282 
       
   283 
       
   284 
       
   285 System.exit(1)
       
   286 
   332 val evil1 = STAR(STAR("a")) ~ "b"
   287 val evil1 = STAR(STAR("a")) ~ "b"
   333 val evil2 = STAR(STAR(STAR("a"))) ~ "b"
   288 val evil2 = STAR(STAR(STAR("a"))) ~ "b"
   334 val evil3 = STAR("aa" | "a")
   289 val evil3 = STAR("aa" | "a")
   335 
   290 
   336 /*
       
   337 println("evil1")
   291 println("evil1")
   338 for(i <- 0 to 10000 by 1000) {
   292 for(i <- 0 to 10000 by 1000) {
   339     println(time_needed(1, blexing2_simp(evil1, "a"*i ++ "b")))
   293     println(time_needed(1, blexing_simp(evil1, "a" * i ++ "b")))
   340 }
   294 }
   341 */
   295 
   342 
   296 
   343 /*
       
   344 println("evil2")
   297 println("evil2")
   345 for(i <- 0 to 10000 by 1000) {
   298 for(i <- 0 to 10000 by 1000) {
   346     println(time_needed(1, blexing2_simp(evil2, "a"*i ++ "b")))
   299     println(time_needed(1, blexing_simp(evil2, "a" * i ++ "b")))
   347 }
   300 }
   348 */
   301 
   349 
   302 
   350 /*
       
   351 println("evil3")
   303 println("evil3")
   352 for(i <- 0 to 10000 by 1000) {
   304 for(i <- 0 to 10000 by 1000) {
   353     println(time_needed(1, blexing2_simp(evil3, "a"*i)))
   305     println(time_needed(1, blexing_simp(evil3, "a" * i)))
   354 }
   306 }
   355 */
       
   356 
   307 
   357 // WHILE LANGUAGE
   308 // WHILE LANGUAGE
   358 //================
   309 //================
   359 def PLUS(r: Rexp) = r ~ r.%
   310 def PLUS(r: Rexp) = r ~ r.%
   360 def RANGE(s: String) = CHARSET(s.toSet)
   311 def RANGE(s: String) = CHARSET(s.toSet)
   382                   ("w" $ WHITESPACE)).%
   333                   ("w" $ WHITESPACE)).%
   383 
   334 
   384 
   335 
   385 // Some Simple While Tests
   336 // Some Simple While Tests
   386 //========================
   337 //========================
       
   338 println("WHILE TESTS")
       
   339 
       
   340 
   387 
   341 
   388 val prog0 = """read n"""
   342 val prog0 = """read n"""
   389 println(s"test: $prog0")
   343 println(s"test: $prog0")
   390 println(env(blexing_simp(WHILE_REGS, prog0)))
   344 println(blexing_simp(WHILE_REGS, prog0))
   391 println(blexing2_simp(WHILE_REGS, prog0))
       
   392 
   345 
   393 val prog1 = """read  n; write n"""  
   346 val prog1 = """read  n; write n"""  
   394 println(s"test: $prog1")
   347 println(s"test: $prog1")
   395 println(env(blexing_simp(WHILE_REGS, prog1)))
   348 println(blexing_simp(WHILE_REGS, prog1))
   396 println(blexing2_simp(WHILE_REGS, prog1))
       
   397 
   349 
   398 val prog2 = """
   350 val prog2 = """
   399 write "Fib";
   351 write "Fib";
   400 read n;
   352 read n;
   401 minus1 := 0;
   353 minus1 := 0;
   409 write "Result";
   361 write "Result";
   410 write minus2
   362 write minus2
   411 """
   363 """
   412 
   364 
   413 println("lexing fib program (once)")
   365 println("lexing fib program (once)")
   414 println(blexing2_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w")))
   366 println(blexing_simp(WHILE_REGS, prog2).filter(s => s == "" || !s.startsWith("w")))
   415 
   367 
   416 val n = 200
   368 val n = 200
   417 println(s"lexing fib program ($n times, size ${prog2.length * n})")
   369 println(s"lexing fib program ($n times, size ${prog2.length * n})")
   418 println(time_needed(1, blexing2_simp(WHILE_REGS, prog2 * n)))
   370 println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n)))