progs/token.scala
changeset 579 1a521448d211
parent 552 40fa0f628dc4
child 580 dbb0f7d2a33d
equal deleted inserted replaced
578:6e5e3adc9eb1 579:1a521448d211
     3 
     3 
     4 abstract class Rexp 
     4 abstract class Rexp 
     5 case object ZERO extends Rexp
     5 case object ZERO extends Rexp
     6 case object ONE extends Rexp
     6 case object ONE extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     8 case class ALTS(rs: List[Rexp]) extends Rexp 
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    11 
    11 case class RECD(x: String, r: Rexp) extends Rexp
    12 // ALT is now an abbreviation
    12 
    13 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
       
    14 
       
    15 // proj is now used instead for Left and Right
       
    16 abstract class Val
    13 abstract class Val
    17 case object Empty extends Val
    14 case object Empty extends Val
    18 case class Chr(c: Char) extends Val
    15 case class Chr(c: Char) extends Val
    19 case class Sequ(v1: Val, v2: Val) extends Val
    16 case class Sequ(v1: Val, v2: Val) extends Val
    20 case class Proj(n: Int, v: Val) extends Val
    17 case class Left(v: Val) extends Val
       
    18 case class Right(v: Val) extends Val
    21 case class Stars(vs: List[Val]) extends Val
    19 case class Stars(vs: List[Val]) extends Val
    22 case class Rec(s: String, v: Val) extends Val
    20 case class Rec(x: String, v: Val) extends Val
    23  
    21    
    24 // for manipulating projections  
       
    25 def IncProj(m: Int, v: Val) = v match {
       
    26   case Proj(n, v) => Proj(n + m, v)
       
    27 }
       
    28 
       
    29 def DecProj(m: Int, v: Val) = v match {
       
    30   case Proj(n, v) => Proj(n - m, v)
       
    31 }
       
    32 
       
    33 
       
    34 // some convenience for typing in regular expressions
    22 // some convenience for typing in regular expressions
    35 def charlist2rexp(s : List[Char]): Rexp = s match {
    23 def charlist2rexp(s : List[Char]): Rexp = s match {
    36   case Nil => ONE
    24   case Nil => ONE
    37   case c::Nil => CHAR(c)
    25   case c::Nil => CHAR(c)
    38   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    26   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    49   def | (r: Rexp) = ALT(s, r)
    37   def | (r: Rexp) = ALT(s, r)
    50   def | (r: String) = ALT(s, r)
    38   def | (r: String) = ALT(s, r)
    51   def % = STAR(s)
    39   def % = STAR(s)
    52   def ~ (r: Rexp) = SEQ(s, r)
    40   def ~ (r: Rexp) = SEQ(s, r)
    53   def ~ (r: String) = SEQ(s, r)
    41   def ~ (r: String) = SEQ(s, r)
    54 }
    42   def $ (r: Rexp) = RECD(s, r)
    55 
    43 }
    56 // string of a regular expression - for testing purposes 
       
    57 def string(r: Rexp) : String = r match {
       
    58   case ZERO => "0"
       
    59   case ONE => "1"
       
    60   case CHAR(c) => c.toString 
       
    61   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
       
    62   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
       
    63   case SEQ(ONE, CHAR(c)) => s"1${c}"
       
    64   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
       
    65   case STAR(r) => s"(${string(r)})*"
       
    66 }
       
    67 
       
    68 // size of a regular expression - for testing purposes 
       
    69 def size(r: Rexp) : Int = r match {
       
    70   case ZERO => 1
       
    71   case ONE => 1
       
    72   case CHAR(_) => 1
       
    73   case ALTS(rs) => 1 + rs.map(size).sum
       
    74   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    75   case STAR(r) => 1 + size(r)
       
    76 }
       
    77 
       
    78 
    44 
    79 // nullable function: tests whether the regular 
    45 // nullable function: tests whether the regular 
    80 // expression can recognise the empty string
    46 // expression can recognise the empty string
    81 def nullable (r: Rexp) : Boolean = r match {
    47 def nullable (r: Rexp) : Boolean = r match {
    82   case ZERO => false
    48   case ZERO => false
    83   case ONE => true
    49   case ONE => true
    84   case CHAR(_) => false
    50   case CHAR(_) => false
    85   case ALTS(rs) => rs.exists(nullable)
    51   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    86   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    52   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    87   case STAR(_) => true
    53   case STAR(_) => true
       
    54   case RECD(_, r1) => nullable(r1)
    88 }
    55 }
    89 
    56 
    90 // derivative of a regular expression w.r.t. a character
    57 // derivative of a regular expression w.r.t. a character
    91 def der (c: Char, r: Rexp) : Rexp = r match {
    58 def der (c: Char, r: Rexp) : Rexp = r match {
    92   case ZERO => ZERO
    59   case ZERO => ZERO
    93   case ONE => ZERO
    60   case ONE => ZERO
    94   case CHAR(d) => if (c == d) ONE else ZERO
    61   case CHAR(d) => if (c == d) ONE else ZERO
    95   case ALTS(rs) => ALTS(rs.map(der(c, _)))
    62   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    96   case SEQ(r1, r2) => 
    63   case SEQ(r1, r2) => 
    97     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    64     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    98     else SEQ(der(c, r1), r2)
    65     else SEQ(der(c, r1), r2)
    99   case STAR(r) => SEQ(der(c, r), STAR(r))
    66   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    67   case RECD(_, r1) => der(c, r1)
   100 }
    68 }
   101 
    69 
   102 // derivative w.r.t. a string (iterates der)
    70 // derivative w.r.t. a string (iterates der)
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    71 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   104   case Nil => r
    72   case Nil => r
   105   case c::s => ders(s, der(c, r))
    73   case c::s => ders(s, der(c, r))
   106 }
    74 }
   107 
    75 
   108 val test : Rexp= STAR("a" | "aa")
       
   109 size(test)
       
   110 size(der('a', test))
       
   111 size(der('a', der('a', test)))
       
   112 
       
   113 size(ders("aaaaaa".toList, test)) 
       
   114 string(ders("aaaaaa".toList, test))
       
   115 
       
   116 
       
   117 // extracts a string from value
    76 // extracts a string from value
   118 def flatten(v: Val) : String = v match {
    77 def flatten(v: Val) : String = v match {
   119   case Empty => ""
    78   case Empty => ""
   120   case Chr(c) => c.toString
    79   case Chr(c) => c.toString
   121   case Proj(_, v) => flatten(v)
    80   case Left(v) => flatten(v)
   122   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
    81   case Right(v) => flatten(v)
       
    82   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
   123   case Stars(vs) => vs.map(flatten).mkString
    83   case Stars(vs) => vs.map(flatten).mkString
   124 }
    84   case Rec(_, v) => flatten(v)
   125 
    85 }
   126 
    86 
   127 // mkeps
    87 // extracts an environment from a value
       
    88 def env(v: Val) : List[(String, String)] = v match {
       
    89   case Empty => Nil
       
    90   case Chr(c) => Nil
       
    91   case Left(v) => env(v)
       
    92   case Right(v) => env(v)
       
    93   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
    94   case Stars(vs) => vs.flatMap(env)
       
    95   case Rec(x, v) => (x, flatten(v))::env(v)
       
    96 }
       
    97 
       
    98 // injection part
   128 def mkeps(r: Rexp) : Val = r match {
    99 def mkeps(r: Rexp) : Val = r match {
   129   case ONE => Empty
   100   case ONE => Empty
   130   case ALTS(r1::rs) => 
   101   case ALT(r1, r2) => 
   131     if (nullable(r1)) Proj(0, mkeps(r1)) 
   102     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   132     else IncProj(1, mkeps(ALTS(rs)))
       
   133   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   103   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   134   case STAR(r) => Stars(Nil)
   104   case STAR(r) => Stars(Nil)
   135 }
   105   case RECD(x, r) => Rec(x, mkeps(r))
   136 
   106 }
   137 // injection 
   107 
       
   108 
   138 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   109 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   139   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   110   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   140   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   111   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   141   case (SEQ(r1, r2), Proj(0, Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   112   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   142   case (SEQ(r1, r2), Proj(1, v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   113   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   143   case (ALTS(rs), Proj(n, v1)) => Proj(n, inj(rs(n), c, v1))
   114   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   115   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   144   case (CHAR(d), Empty) => Chr(c) 
   116   case (CHAR(d), Empty) => Chr(c) 
       
   117   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   145 }
   118 }
   146 
   119 
   147 // main lexing function (produces a value)
   120 // main lexing function (produces a value)
   148 //  - does  not simplify
       
   149 def lex(r: Rexp, s: List[Char]) : Val = s match {
   121 def lex(r: Rexp, s: List[Char]) : Val = s match {
   150   case Nil => {
   122   case Nil => if (nullable(r)) mkeps(r) 
   151     //println(s"Size of the last regex: ${size(r)}")
   123               else throw new Exception("Not matched")
   152     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   153   }
       
   154   case c::cs => inj(r, c, lex(der(c, r), cs))
   124   case c::cs => inj(r, c, lex(der(c, r), cs))
   155 }
   125 }
   156 
   126 
   157 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
   127 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
   158 
   128 
   159 lexing("a" | ZERO, "a")
       
   160 lexing(ZERO | "a", "a")
       
   161 
   129 
   162 lexing(("ab" | "a") ~ ("b" | ONE), "ab")
   130 lexing(("ab" | "a") ~ ("b" | ONE), "ab")
   163 
   131 
   164 
       
   165 // removing duplicate regular expressions
       
   166 val unit = (ZERO, F_ERROR(_))
       
   167 
       
   168 def dups2(xs: List[(Rexp, Val => Val)], 
       
   169 	 acc: List[(Rexp, Val => Val)] = Nil) : List[(Rexp, Val => Val)] = xs match {
       
   170   case Nil => acc
       
   171   case (x, y)::xs => 
       
   172 	     if (acc.map(_._1).contains(x)) dups2(xs, acc :+ unit)
       
   173 	     else dups2(xs, acc :+ (x, y))
       
   174 }
       
   175 
       
   176 def dups(xs: List[(Rexp, Val => Val)]) : List[(Rexp, Val => Val)] = {
       
   177   val out = dups2(xs)
       
   178   //if (out != xs) {
       
   179   //  println()
       
   180   //  println(s"Input ${string(ALTS(xs.map(_._1)))}")
       
   181   //  println(s"Ouput ${string(ALTS(out.map(_._1)))}")
       
   182   //}
       
   183   out
       
   184 }
       
   185 
   132 
   186 
   133 
   187 // some "rectification" functions for simplification
   134 // some "rectification" functions for simplification
   188 def F_ID(v: Val): Val = v
   135 def F_ID(v: Val): Val = v
       
   136 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   137 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   138 def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   139   case Right(v) => Right(f2(v))
       
   140   case Left(v) => Left(f1(v))
       
   141 }
   189 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   142 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   190   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   143   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   191 }
   144 }
   192 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
   145 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
   193   (v:Val) => Sequ(f1(Empty), f2(v))
   146   (v:Val) => Sequ(f1(Empty), f2(v))
   194 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
   147 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
   195   (v:Val) => Sequ(f1(v), f2(Empty))
   148   (v:Val) => Sequ(f1(v), f2(Empty))
       
   149 def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   150   case Rec(x, v) => Rec(x, f(v))
       
   151 }
   196 def F_ERROR(v: Val): Val = throw new Exception("error")
   152 def F_ERROR(v: Val): Val = throw new Exception("error")
   197 def F_PRINT(v: Val): Val = {
       
   198   println(s"value is ${v}")
       
   199   throw new Exception("caught error")
       
   200 }
       
   201 
       
   202 def flats(rs: List[Rexp], seen: Set[Rexp]) : (List[Rexp], Val => Val) = {
       
   203   //println(s"I am flats: ${string(ALTS(rs))}")
       
   204   //println(s"The size of seen is ${seen.size}, ${seen.map(string)}")
       
   205   rs match {
       
   206   case Nil => (Nil, F_ERROR)
       
   207   case r::rs1 if seen.contains(simp(r)._1) => {
       
   208     //println(s" I remove ${string(r)}")
       
   209     val (rs2, f) = flats(rs1, seen)
       
   210     (rs2, (v:Val) => IncProj(1, f(v)))
       
   211   }
       
   212   case ZERO::rs1 => {
       
   213     val (rs2, f) = flats(rs1, seen)
       
   214     (rs2, (v:Val) => IncProj(1, f(v)))
       
   215   }
       
   216   case ALTS(rs0)::rs1 => {
       
   217     val (rss, f1) = flats(rs0, seen)
       
   218     val (rs2, f2) = flats(rs1, rss.toSet ++ seen)
       
   219     (rss:::rs2, (v:Val) => v match {
       
   220       case Proj(n, vn) => 
       
   221      	if (n < rss.length) Proj(0, f1(Proj(n, vn)))
       
   222   	else IncProj(1, f2(Proj(n - rss.length, vn)))
       
   223     })
       
   224   }
       
   225   case r1::rs2 => {
       
   226     val (r1s, f1) = simp(r1)
       
   227     val (rs3, f2) = flats(rs2, seen + r1s)
       
   228     (r1s::rs3, (v:Val) => v match {
       
   229       case Proj(0, vn) => Proj(0, f1(vn))
       
   230       case Proj(n, vn) => IncProj(1, f2(Proj(n - 1, vn)))
       
   231     })
       
   232   }
       
   233 }}
       
   234 
   153 
   235 // simplification of regular expressions returning also an
   154 // simplification of regular expressions returning also an
   236 // rectification function; no simplification under STAR 
   155 // rectification function; no simplification under STAR 
   237 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   156 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   238   case ALTS(rs) => {
   157   case ALT(r1, r2) => {
   239     val (rs_s, fs_s) = flats(rs, Set())
   158     val (r1s, f1s) = simp(r1)
   240     rs_s match {
   159     val (r2s, f2s) = simp(r2)
   241       case Nil => (ZERO, F_ERROR) 
   160     (r1s, r2s) match {
   242       case r::Nil => (r, (v:Val) => fs_s(Proj(0, v)))
   161       case (ZERO, _) => (r2s, F_RIGHT(f2s))
   243       case rs_sd => (ALTS(rs_sd), fs_s)
   162       case (_, ZERO) => (r1s, F_LEFT(f1s))
       
   163       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   164                 else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
   244     }
   165     }
   245   }
   166   }
   246   case SEQ(r1, r2) => {
   167   case SEQ(r1, r2) => {
   247     val (r1s, f1s) = simp(r1)
   168     val (r1s, f1s) = simp(r1)
   248     val (r2s, f2s) = simp(r2)
   169     val (r2s, f2s) = simp(r2)
   249     (r1s, r2s) match {
   170     (r1s, r2s) match {
   250       case (ZERO, _) => (ZERO, F_ERROR)
   171       case (ZERO, _) => (ZERO, F_ERROR)
   251       case (_, ZERO) => (ZERO, F_ERROR)
   172       case (_, ZERO) => (ZERO, F_ERROR)
   252       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   173       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   253       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
   174       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
   254       case (ALTS(rs), r2s) => (ALTS(rs.map(SEQ(_, r2s))), 
       
   255 			       (v:Val) => v match {
       
   256 				 case Proj(n, Sequ(v1, v2)) => Sequ(f1s(Proj(n, v1)), f2s(v2))
       
   257 			       }
       
   258 			      )
       
   259       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   175       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   260     }
   176     }
   261   }
   177   }
       
   178   case RECD(x, r1) => {
       
   179     val (r1s, f1s) = simp(r1)
       
   180     (RECD(x, r1s), F_RECD(f1s))
       
   181   }
   262   case r => (r, F_ID)
   182   case r => (r, F_ID)
   263 }
   183 }
   264 
   184 
   265 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   185 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   266   case Nil => {
   186   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   267     //println(s"Size of the last regex: ${size(r)}")
       
   268     //println(s"${string(r)}")
       
   269     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   270   }
       
   271   case c::cs => {
   187   case c::cs => {
   272     val rd = der(c, r)     
   188     val (r_simp, f_simp) = simp(der(c, r))
   273     val (r_simp, f_simp) = simp(rd)
   189     inj(r, c, f_simp(lex_simp(r_simp, cs)))
   274     println(s"BEFORE ${string(rd)}")
       
   275     println(s"AFTER  ${string(r_simp)}")
       
   276     val rec = lex_simp(r_simp, cs)
       
   277     inj(r, c, f_simp(rec))
       
   278   }
   190   }
   279 }
   191 }
   280 
   192 
   281 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   193 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   282 
   194 
   283 
       
   284 lexing_simp("ab" | "aa", "ab")
       
   285 lexing_simp("ab" | "aa", "aa")
       
   286 
       
   287 lexing(STAR("a" | "aa"), "aaaaa")
       
   288 lexing_simp(STAR("a" | "aa"), "aaaaa")
       
   289 
       
   290 lexing(STAR("a" | "aa"), "aaaaaaaaaaa")
       
   291 lexing_simp(STAR("a" | "aa"), "aaaaaaaaaaa")
       
   292 
       
   293 lexing_simp(STAR("a" | "aa"), "a" * 2)
       
   294 lexing_simp(STAR("a" | "aa"), "a" * 3)
       
   295 lexing_simp(STAR("a" | "aa"), "a" * 4)
       
   296 
       
   297 
       
   298 lexing_simp(STAR("a" | "aa"), "a" * 20)
       
   299 lexing_simp(STAR("a" | "aa"), "a" * 2000)
       
   300 
       
   301 lexing(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
       
   302 lexing_simp(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
       
   303 
       
   304 lexing(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
       
   305 lexing_simp(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
       
   306 
       
   307 lexing(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa")
       
   308 lexing_simp(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa")
       
   309 
       
   310 lexing_simp(ONE | ZERO, "")
       
   311 lexing_simp(ZERO | ONE, "")
       
   312 
       
   313 lexing("a" | ZERO, "a")
       
   314 lexing_simp("a" | ZERO, "a")
       
   315 lexing(ZERO | "a", "a")
       
   316 lexing_simp(ZERO | "a", "a")
       
   317 
       
   318 lexing(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a")
       
   319 lexing_simp(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a")
       
   320 lexing(ALTS(List("a")), "a")
       
   321 lexing_simp(ALTS(List("a")), "a")
       
   322 
       
   323 lexing_simp(("a" | ZERO) | ZERO, "a")
       
   324 lexing_simp("a" | (ZERO | ZERO), "a")
       
   325 lexing_simp(ZERO | ("a" | ZERO), "a")
       
   326 
       
   327 
       
   328 lexing_simp("abc", "abc")
       
   329 
       
   330 lexing_simp("abc" | ONE, "abc")
       
   331 
       
   332 lexing(("a" | "ab") ~ ("b" | ""), "ab")
       
   333 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
   195 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
   334 lexing_simp(("ba" | "c" | "ab"), "ab")
   196 
   335 
   197 // Lexing Rules for a Small While Language
   336 lexing(ALTS(List(ALTS(Nil), "c", "ab")), "ab")
   198 
   337 lexing_simp(ALTS(List(ALTS(Nil), "c", "ab")), "ab")
   199 def PLUS(r: Rexp) = r ~ r.%
   338 
   200 
   339 lexing(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
   201 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
   340 lexing_simp(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
   202 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   341 
   203 val ID = SYM ~ (SYM | DIGIT).% 
   342 lexing(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
   204 val NUM = PLUS(DIGIT)
   343 lexing_simp(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
   205 val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
   344 
   206 val SEMI: Rexp = ";"
   345 lexing(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
   207 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
   346 lexing_simp(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
   208 val WHITESPACE = PLUS(" " | "\n" | "\t")
   347 
   209 val RPAREN: Rexp = ")"
   348 lexing(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
   210 val LPAREN: Rexp = "("
   349 lexing_simp(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
   211 val BEGIN: Rexp = "{"
   350 
   212 val END: Rexp = "}"
   351 lexing(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
   213 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
   352 lexing_simp(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
   214 
   353 
   215 
   354 lexing(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
   216 val WHILE_REGS = (("k" $ KEYWORD) | 
   355 lexing_simp(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
   217                   ("i" $ ID) | 
   356 
   218                   ("o" $ OP) | 
   357 
   219                   ("n" $ NUM) | 
   358 lexing_simp(ALTS(List("ba", "c", "ab")), "ab")
   220                   ("s" $ SEMI) | 
   359 
   221                   ("str" $ STRING) |
   360 lexing(ALTS(List("a", "ab", "a")), "ab")
   222                   ("p" $ (LPAREN | RPAREN)) | 
   361 lexing_simp(ALTS(List("a", "ab", "a")), "ab")
   223                   ("b" $ (BEGIN | END)) | 
   362 
   224                   ("w" $ WHITESPACE)).%
   363 lexing(STAR("a" | "aa"), "aa")
   225 
   364 lexing_simp(STAR("a" | "aa"), "aa")
   226 //   Testing
   365 
   227 //============
   366 
   228 
   367 
   229 def time[T](code: => T) = {
   368 
   230   val start = System.nanoTime()
   369 def enum(n: Int, s: String) : Set[Rexp] = n match {
   231   val result = code
   370   case 0 => Set(ZERO, ONE) ++ s.toSet.map(CHAR)
   232   val end = System.nanoTime()
   371   case n => {
   233   println((end - start)/1.0e9)
   372     val rs = enum(n - 1, s)
   234   result
   373     rs ++
   235 }
   374     (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++
   236 
   375     (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++
   237 val r1 = ("a" | "ab") ~ ("bcd" | "c")
   376     (for (r1 <- rs) yield STAR(r1))
   238 println(lexing(r1, "abcd"))
   377   }
   239 
   378 }
   240 val r2 = ("" | "a") ~ ("ab" | "b")
   379 
   241 println(lexing(r2, "ab"))
   380 def strs(n: Int, cs: String) : Set[String] = {
   242 
   381   if (n == 0) Set("")
   243 
   382   else {
   244 // Two Simple While Tests
   383     val ss = strs(n - 1, cs)
   245 //========================
   384     ss ++
   246 println("prog0 test")
   385     (for (s <- ss; c <- cs.toList) yield c + s)
   247 
   386   }
   248 val prog0 = """read n"""
   387 }
   249 println(env(lexing_simp(WHILE_REGS, prog0)))
   388 
   250 
   389 import scala.util.Try
   251 println("prog1 test")
   390 
   252 
   391 def tests(n: Int, m: Int, s: String) = {
   253 val prog1 = """read  n; write (n)"""
   392   val rs = enum(n, s)
   254 println(env(lexing_simp(WHILE_REGS, prog1)))
   393   val ss = strs(m, s)
   255 
   394   println(s"cases generated: ${rs.size} regexes and ${ss.size} strings")
   256 
   395   for (r1 <- rs.par; s1 <- ss.par) yield {
   257 // Bigger Test
   396     val res1 = Try(Some(lexing(r1, s1))).getOrElse(None)
   258 //=============
   397     val res2 = Try(Some(lexing_simp(r1, s1))).getOrElse(None)
   259 
   398     if (res1 != res2) println(s"Disagree on ${r1} and ${s1}")
   260 val prog2 = """
   399     if (res1 != res2) Some((r1, s1)) else None
   261 write "fib";
   400   }
   262 read n;
   401 }
   263 minus1 := 0;
   402 
   264 minus2 := 1;
   403 println("Testing")
   265 while n > 0 do {
   404 println(tests(2,7,"abc"))
   266   temp := minus2;
   405 
   267   minus2 := minus1 + minus2;
   406 
   268   minus1 := temp;
   407 
   269   n := n - 1
       
   270 };
       
   271 write "result";
       
   272 write minus2
       
   273 """
       
   274 
       
   275 println("Tokens")
       
   276 println(env(lexing_simp(WHILE_REGS, prog2)))
       
   277 println(env(lexing_simp(WHILE_REGS, prog2)).filterNot{_._1 == "w"}.mkString("\n"))
       
   278 
       
   279 // some more timing tests with
       
   280 // i copies of the program
       
   281 
       
   282 for (i <- 1 to 21 by 10) {
       
   283   print(i.toString + ":  ")
       
   284   time(lexing_simp(WHILE_REGS, prog2 * i))
       
   285 }
       
   286 
       
   287