progs/token.scala
changeset 549 352d15782d35
parent 542 37a3db7cd655
child 550 71fc4a7a7039
equal deleted inserted replaced
548:c6ba1d17aaf3 549:352d15782d35
     3 
     3 
     4 abstract class Rexp 
     4 abstract class Rexp 
     5 case object ZERO extends Rexp
     5 case object ZERO extends Rexp
     6 case object ONE extends Rexp
     6 case object ONE extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     8 case class ALTS(rs: List[Rexp]) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp 
    11 case class RECD[A](f: String => A, r: Rexp) extends Rexp
    11 
    12 
    12 // ALT is now an abbreviation
       
    13 def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
       
    14 
       
    15 // proj is now used instead for Left and Right
    13 abstract class Val
    16 abstract class Val
    14 case object Empty extends Val
    17 case object Empty extends Val
    15 case class Chr(c: Char) extends Val
    18 case class Chr(c: Char) extends Val
    16 case class Sequ(v1: Val, v2: Val) extends Val
    19 case class Sequ(v1: Val, v2: Val) extends Val
    17 case class Left(v: Val) extends Val
    20 case class Proj(n: Int, v: Val) extends Val
    18 case class Right(v: Val) extends Val
       
    19 case class Stars(vs: List[Val]) extends Val
    21 case class Stars(vs: List[Val]) extends Val
    20 case class Rec[A](f: String => A, v: Val) extends Val
    22 case class Rec(s: String, v: Val) extends Val
    21    
    23  
       
    24 // for manipulating projections  
       
    25 def IncProj(m: Int, v: Val) = v match {
       
    26   case Proj(n, v) => Proj(n + m, v)
       
    27 }
       
    28 
       
    29 def DecProj(m: Int, v: Val) = v match {
       
    30   case Proj(n, v) => Proj(n - m, v)
       
    31 }
       
    32 
       
    33 
    22 // some convenience for typing in regular expressions
    34 // some convenience for typing in regular expressions
    23 def charlist2rexp(s : List[Char]): Rexp = s match {
    35 def charlist2rexp(s : List[Char]): Rexp = s match {
    24   case Nil => ONE
    36   case Nil => ONE
    25   case c::Nil => CHAR(c)
    37   case c::Nil => CHAR(c)
    26   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    38   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    37   def | (r: Rexp) = ALT(s, r)
    49   def | (r: Rexp) = ALT(s, r)
    38   def | (r: String) = ALT(s, r)
    50   def | (r: String) = ALT(s, r)
    39   def % = STAR(s)
    51   def % = STAR(s)
    40   def ~ (r: Rexp) = SEQ(s, r)
    52   def ~ (r: Rexp) = SEQ(s, r)
    41   def ~ (r: String) = SEQ(s, r)
    53   def ~ (r: String) = SEQ(s, r)
    42   def $ (r: Rexp) = RECD(s, r)
    54 }
    43 }
    55 
       
    56 // string of a regular expression - for testing purposes 
       
    57 def string(r: Rexp) : String = r match {
       
    58   case ZERO => "0"
       
    59   case ONE => "1"
       
    60   case CHAR(c) => c.toString 
       
    61   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
       
    62   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
       
    63   case SEQ(ONE, CHAR(c)) => s"1${c}"
       
    64   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
       
    65   case STAR(r) => s"(${string(r)})*"
       
    66 }
       
    67 
       
    68 // size of a regular expression - for testing purposes 
       
    69 def size(r: Rexp) : Int = r match {
       
    70   case ZERO => 1
       
    71   case ONE => 1
       
    72   case CHAR(_) => 1
       
    73   case ALTS(rs) => 1 + rs.map(size).sum
       
    74   case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
    75   case STAR(r) => 1 + size(r)
       
    76 }
       
    77 
    44 
    78 
    45 // nullable function: tests whether the regular 
    79 // nullable function: tests whether the regular 
    46 // expression can recognise the empty string
    80 // expression can recognise the empty string
    47 def nullable (r: Rexp) : Boolean = r match {
    81 def nullable (r: Rexp) : Boolean = r match {
    48   case ZERO => false
    82   case ZERO => false
    49   case ONE => true
    83   case ONE => true
    50   case CHAR(_) => false
    84   case CHAR(_) => false
    51   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    85   case ALTS(rs) => rs.exists(nullable)
    52   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    86   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    53   case STAR(_) => true
    87   case STAR(_) => true
    54   case RECD(_, r1) => nullable(r1)
       
    55 }
    88 }
    56 
    89 
    57 // derivative of a regular expression w.r.t. a character
    90 // derivative of a regular expression w.r.t. a character
    58 def der (c: Char, r: Rexp) : Rexp = r match {
    91 def der (c: Char, r: Rexp) : Rexp = r match {
    59   case ZERO => ZERO
    92   case ZERO => ZERO
    60   case ONE => ZERO
    93   case ONE => ZERO
    61   case CHAR(d) => if (c == d) ONE else ZERO
    94   case CHAR(d) => if (c == d) ONE else ZERO
    62   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    95   case ALTS(rs) => ALTS(rs.map(der(c, _)))
    63   case SEQ(r1, r2) => 
    96   case SEQ(r1, r2) => 
    64     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    97     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    65     else SEQ(der(c, r1), r2)
    98     else SEQ(der(c, r1), r2)
    66   case STAR(r) => SEQ(der(c, r), STAR(r))
    99   case STAR(r) => SEQ(der(c, r), STAR(r))
    67   case RECD(_, r1) => der(c, r1)
       
    68 }
   100 }
    69 
   101 
    70 // derivative w.r.t. a string (iterates der)
   102 // derivative w.r.t. a string (iterates der)
    71 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    72   case Nil => r
   104   case Nil => r
    75 
   107 
    76 // extracts a string from value
   108 // extracts a string from value
    77 def flatten(v: Val) : String = v match {
   109 def flatten(v: Val) : String = v match {
    78   case Empty => ""
   110   case Empty => ""
    79   case Chr(c) => c.toString
   111   case Chr(c) => c.toString
    80   case Left(v) => flatten(v)
   112   case Proj(_, v) => flatten(v)
    81   case Right(v) => flatten(v)
       
    82   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
   113   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
    83   case Stars(vs) => vs.map(flatten).mkString
   114   case Stars(vs) => vs.map(flatten).mkString
    84   case Rec(_, v) => flatten(v)
   115 }
    85 }
   116 
    86 
   117 
    87 // extracts an environment from a value
   118 // mkeps
    88 def env[A](v: Val) : List[A] = v match {
       
    89   case Empty => Nil
       
    90   case Chr(c) => Nil
       
    91   case Left(v) => env(v)
       
    92   case Right(v) => env(v)
       
    93   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
    94   case Stars(vs) => vs.flatMap(env)
       
    95   case Rec(f, v) => ((f:String => A)(flatten(v)))::env(v)
       
    96 }
       
    97 
       
    98 // injection part
       
    99 def mkeps(r: Rexp) : Val = r match {
   119 def mkeps(r: Rexp) : Val = r match {
   100   case ONE => Empty
   120   case ONE => Empty
   101   case ALT(r1, r2) => 
   121   case ALTS(r1::rs) => 
   102     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   122     if (nullable(r1)) Proj(0, mkeps(r1)) 
       
   123     else IncProj(1, mkeps(ALTS(rs)))
   103   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   124   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   104   case STAR(r) => Stars(Nil)
   125   case STAR(r) => Stars(Nil)
   105   case RECD(x, r) => Rec(x, mkeps(r))
   126 }
   106 }
   127 
   107 
   128 // injection 
   108 
       
   109 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   129 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   110   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   130   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   111   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   131   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   112   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   132   case (SEQ(r1, r2), Proj(0, Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   113   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   133   case (SEQ(r1, r2), Proj(1, v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   114   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   134   case (ALTS(rs), Proj(n, v1)) => Proj(n, inj(rs(n), c, v1))
   115   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   116   case (CHAR(d), Empty) => Chr(c) 
   135   case (CHAR(d), Empty) => Chr(c) 
   117   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   118 }
   136 }
   119 
   137 
   120 // main lexing function (produces a value)
   138 // main lexing function (produces a value)
       
   139 //  - does  not simplify
   121 def lex(r: Rexp, s: List[Char]) : Val = s match {
   140 def lex(r: Rexp, s: List[Char]) : Val = s match {
   122   case Nil => if (nullable(r)) mkeps(r) 
   141   case Nil => {
   123               else throw new Exception("Not matched")
   142     //println(s"Size of the last regex: ${size(r)}")
       
   143     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   144   }
   124   case c::cs => inj(r, c, lex(der(c, r), cs))
   145   case c::cs => inj(r, c, lex(der(c, r), cs))
   125 }
   146 }
   126 
   147 
   127 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
   148 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
   128 
   149 
       
   150 lexing("a" | ZERO, "a")
       
   151 lexing(ZERO | "a", "a")
   129 
   152 
   130 lexing(("ab" | "a") ~ ("b" | ONE), "ab")
   153 lexing(("ab" | "a") ~ ("b" | ONE), "ab")
       
   154 
       
   155 
       
   156 // removing duplicate regular expressions
       
   157 val unit = (ZERO, F_ERROR(_))
       
   158 
       
   159 def dups2(xs: List[(Rexp, Val => Val)], 
       
   160 	 acc: List[(Rexp, Val => Val)] = Nil) : List[(Rexp, Val => Val)] = xs match {
       
   161   case Nil => acc
       
   162   case (x, y)::xs => 
       
   163 	     if (acc.map(_._1).contains(x)) dups2(xs, acc :+ unit)
       
   164 	     else dups2(xs, acc :+ (x, y))
       
   165 }
       
   166 
       
   167 def dups(xs: List[(Rexp, Val => Val)]) : List[(Rexp, Val => Val)] = {
       
   168   val out = dups2(xs)
       
   169   //if (out != xs) {
       
   170   //  println()
       
   171   //  println(s"Input ${string(ALTS(xs.map(_._1)))}")
       
   172   //  println(s"Ouput ${string(ALTS(out.map(_._1)))}")
       
   173   //}
       
   174   out
       
   175 }
       
   176 
   131 
   177 
   132 // some "rectification" functions for simplification
   178 // some "rectification" functions for simplification
   133 def F_ID(v: Val): Val = v
   179 def F_ID(v: Val): Val = v
   134 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   135 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   136 def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   137   case Right(v) => Right(f2(v))
       
   138   case Left(v) => Left(f1(v))
       
   139 }
       
   140 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   180 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   141   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   181   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   142 }
   182 }
   143 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
   183 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
   144   (v:Val) => Sequ(f1(Empty), f2(v))
   184   (v:Val) => Sequ(f1(Empty), f2(v))
   145 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
   185 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
   146   (v:Val) => Sequ(f1(v), f2(Empty))
   186   (v:Val) => Sequ(f1(v), f2(Empty))
   147 def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   148   case Rec(x, v) => Rec(x, f(v))
       
   149 }
       
   150 def F_ERROR(v: Val): Val = throw new Exception("error")
   187 def F_ERROR(v: Val): Val = throw new Exception("error")
       
   188 def F_PRINT(v: Val): Val = {
       
   189   println(s"value is ${v}")
       
   190   throw new Exception("caught error")
       
   191 }
       
   192 
       
   193 def flats(rs: List[Rexp], seen: Set[Rexp]) : (List[Rexp], Val => Val) = {
       
   194   //println(s"I am flats: ${string(ALTS(rs))}")
       
   195   //println(s"The size of seen is ${seen.size}, ${seen.map(string)}")
       
   196   rs match {
       
   197   case Nil => (Nil, F_ERROR)
       
   198   case r::rs1 if seen.contains(simp(r)._1) => {
       
   199     //println(s" I remove ${string(r)}")
       
   200     val (rs2, f) = flats(rs1, seen)
       
   201     (rs2, (v:Val) => IncProj(1, f(v)))
       
   202   }
       
   203   case ZERO::rs1 => {
       
   204     val (rs2, f) = flats(rs1, seen)
       
   205     (rs2, (v:Val) => IncProj(1, f(v)))
       
   206   }
       
   207   case ALTS(rs0)::rs1 => {
       
   208     val (rss, f1) = flats(rs0, seen)
       
   209     val (rs2, f2) = flats(rs1, rss.toSet ++ seen)
       
   210     (rss:::rs2, (v:Val) => v match {
       
   211       case Proj(n, vn) => 
       
   212      	if (n < rss.length) Proj(0, f1(Proj(n, vn)))
       
   213   	else IncProj(1, f2(Proj(n - rss.length, vn)))
       
   214     })
       
   215   }
       
   216   case r1::rs2 => {
       
   217     val (r1s, f1) = simp(r1)
       
   218     val (rs3, f2) = flats(rs2, seen + r1s)
       
   219     (r1s::rs3, (v:Val) => v match {
       
   220       case Proj(0, vn) => Proj(0, f1(vn))
       
   221       case Proj(n, vn) => IncProj(1, f2(Proj(n - 1, vn)))
       
   222     })
       
   223   }
       
   224 }}
   151 
   225 
   152 // simplification of regular expressions returning also an
   226 // simplification of regular expressions returning also an
   153 // rectification function; no simplification under STAR 
   227 // rectification function; no simplification under STAR 
   154 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   228 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   155   case ALT(r1, r2) => {
   229   case ALTS(rs) => {
   156     val (r1s, f1s) = simp(r1)
   230     val (rs_s, fs_s) = flats(rs, Set())
   157     val (r2s, f2s) = simp(r2)
   231     rs_s match {
   158     (r1s, r2s) match {
   232       case Nil => (ZERO, F_ERROR) 
   159       case (ZERO, _) => (r2s, F_RIGHT(f2s))
   233       case r::Nil => (r, (v:Val) => fs_s(Proj(0, v)))
   160       case (_, ZERO) => (r1s, F_LEFT(f1s))
   234       case rs_sd => (ALTS(rs_sd), fs_s)
   161       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   162                 else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
       
   163     }
   235     }
   164   }
   236   }
   165   case SEQ(r1, r2) => {
   237   case SEQ(r1, r2) => {
   166     val (r1s, f1s) = simp(r1)
   238     val (r1s, f1s) = simp(r1)
   167     val (r2s, f2s) = simp(r2)
   239     val (r2s, f2s) = simp(r2)
   168     (r1s, r2s) match {
   240     (r1s, r2s) match {
   169       case (ZERO, _) => (ZERO, F_ERROR)
   241       case (ZERO, _) => (ZERO, F_ERROR)
   170       case (_, ZERO) => (ZERO, F_ERROR)
   242       case (_, ZERO) => (ZERO, F_ERROR)
   171       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   243       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   172       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
   244       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
       
   245       case (ALTS(rs), r2s) => (ALTS(rs.map(SEQ(_, r2s))), 
       
   246 			       (v:Val) => v match {
       
   247 				 case Proj(n, Sequ(v1, v2)) => Sequ(f1s(Proj(n, v1)), f2s(v2))
       
   248 			       }
       
   249 			      )
   173       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   250       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   174     }
   251     }
   175   }
   252   }
   176   case RECD(x, r1) => {
       
   177     val (r1s, f1s) = simp(r1)
       
   178     (RECD(x, r1s), F_RECD(f1s))
       
   179   }
       
   180   case r => (r, F_ID)
   253   case r => (r, F_ID)
   181 }
   254 }
   182 
   255 
   183 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   256 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   184   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   257   case Nil => {
       
   258     //println(s"Size of the last regex: ${size(r)}")
       
   259     //println(s"${string(r)}")
       
   260     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   261   }
   185   case c::cs => {
   262   case c::cs => {
   186     val (r_simp, f_simp) = simp(der(c, r))
   263     val rd = der(c, r)     
   187     inj(r, c, f_simp(lex_simp(r_simp, cs)))
   264     val (r_simp, f_simp) = simp(rd)
       
   265     //println(s"BEFORE ${string(rd)}")
       
   266     //println(s"AFTER  ${string(r_simp)}")
       
   267     val rec = lex_simp(r_simp, cs)
       
   268     inj(r, c, f_simp(rec))
   188   }
   269   }
   189 }
   270 }
   190 
   271 
   191 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   272 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   192 
   273 
       
   274 
       
   275 lexing_simp("ab" | "aa", "ab")
       
   276 lexing_simp("ab" | "aa", "aa")
       
   277 
       
   278 lexing(STAR("a" | "aa"), "aaaaa")
       
   279 lexing_simp(STAR("a" | "aa"), "aaaaa")
       
   280 
       
   281 lexing(STAR("a" | "aa"), "aaaaaaaaaaa")
       
   282 lexing_simp(STAR("a" | "aa"), "aaaaaaaaaaa")
       
   283 
       
   284 lexing_simp(STAR("a" | "aa"), "a" * 2)
       
   285 lexing_simp(STAR("a" | "aa"), "a" * 3)
       
   286 lexing_simp(STAR("a" | "aa"), "a" * 4)
       
   287 
       
   288 
       
   289 lexing_simp(STAR("a" | "aa"), "a" * 20)
       
   290 lexing_simp(STAR("a" | "aa"), "a" * 2000)
       
   291 
       
   292 lexing(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
       
   293 lexing_simp(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
       
   294 
       
   295 lexing(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
       
   296 lexing_simp(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
       
   297 
       
   298 lexing(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa")
       
   299 lexing_simp(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa")
       
   300 
       
   301 lexing_simp(ONE | ZERO, "")
       
   302 lexing_simp(ZERO | ONE, "")
       
   303 
       
   304 lexing("a" | ZERO, "a")
       
   305 lexing_simp("a" | ZERO, "a")
       
   306 lexing(ZERO | "a", "a")
       
   307 lexing_simp(ZERO | "a", "a")
       
   308 
       
   309 lexing(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a")
       
   310 lexing_simp(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a")
       
   311 lexing(ALTS(List("a")), "a")
       
   312 lexing_simp(ALTS(List("a")), "a")
       
   313 
       
   314 lexing_simp(("a" | ZERO) | ZERO, "a")
       
   315 lexing_simp("a" | (ZERO | ZERO), "a")
       
   316 lexing_simp(ZERO | ("a" | ZERO), "a")
       
   317 
       
   318 
       
   319 lexing_simp("abc", "abc")
       
   320 
       
   321 lexing_simp("abc" | ONE, "abc")
       
   322 
       
   323 lexing(("a" | "ab") ~ ("b" | ""), "ab")
   193 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
   324 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
   194 
   325 lexing_simp(("ba" | "c" | "ab"), "ab")
   195 // Lexing Rules for a Small While Language
   326 
   196 
   327 lexing(ALTS(List(ALTS(Nil), "c", "ab")), "ab")
   197 def PLUS(r: Rexp) = r ~ r.%
   328 lexing_simp(ALTS(List(ALTS(Nil), "c", "ab")), "ab")
   198 
   329 
   199 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
   330 lexing(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
   200 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   331 lexing_simp(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
   201 val ID = SYM ~ (SYM | DIGIT).% 
   332 
   202 val NUM = PLUS(DIGIT)
   333 lexing(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
   203 val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
   334 lexing_simp(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
   204 val SEMI: Rexp = ";"
   335 
   205 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
   336 lexing(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
   206 val WHITESPACE = PLUS(" " | "\n" | "\t")
   337 lexing_simp(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
   207 val RPAREN: Rexp = ")"
   338 
   208 val LPAREN: Rexp = "("
   339 lexing(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
   209 val BEGIN: Rexp = "{"
   340 lexing_simp(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
   210 val END: Rexp = "}"
   341 
   211 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
   342 lexing(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
   212 
   343 lexing_simp(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
   213 
   344 
   214 val WHILE_REGS = (((s) => Token $ KEYWORD) | 
   345 lexing(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
   215                   ("i" $ ID) | 
   346 lexing_simp(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
   216                   ("o" $ OP) | 
   347 
   217                   ("n" $ NUM) | 
   348 
   218                   ("s" $ SEMI) | 
   349 lexing_simp(ALTS(List("ba", "c", "ab")), "ab")
   219                   ("str" $ STRING) |
   350 
   220                   ("p" $ (LPAREN | RPAREN)) | 
   351 lexing(ALTS(List("a", "ab", "a")), "ab")
   221                   ("b" $ (BEGIN | END)) | 
   352 lexing_simp(ALTS(List("a", "ab", "a")), "ab")
   222                   ("w" $ WHITESPACE)).%
   353 
   223 
   354 lexing(STAR("a" | "aa"), "aa")
   224 
   355 lexing_simp(STAR("a" | "aa"), "aa")
   225 
   356 
   226 
   357 
   227 // extracts an environment from a value
   358 
   228 def env(v: Val) : List[Token] = v match {
   359 
   229   case Empty => Nil
   360 def enum(n: Int, s: String) : Set[Rexp] = n match {
   230   case Chr(c) => Nil
   361   case 0 => Set(ZERO, ONE) ++ s.toSet.map(CHAR)
   231   case Left(v) => env(v)
   362   case n => {
   232   case Right(v) => env(v)
   363     val rs = enum(n - 1, s)
   233   case Sequ(v1, v2) => env(v1) ::: env(v2)
   364     rs ++
   234   case Stars(vs) => vs.flatMap(env)
   365     (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++
   235   case Rec(f, v) => (f(flatten(v)))::env(v)
   366     (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++
   236 }
   367     (for (r1 <- rs) yield STAR(r1))
   237 
   368   }
   238 //   Testing
   369 }
   239 //============
   370 
   240 
   371 def strs(n: Int, cs: String) : Set[String] = {
   241 def time[T](code: => T) = {
   372   if (n == 0) Set("")
   242   val start = System.nanoTime()
   373   else {
   243   val result = code
   374     val ss = strs(n - 1, cs)
   244   val end = System.nanoTime()
   375     ss ++
   245   println((end - start)/1.0e9)
   376     (for (s <- ss; c <- cs.toList) yield c + s)
   246   result
   377   }
   247 }
   378 }
   248 
   379 
   249 val r1 = ("a" | "ab") ~ ("bcd" | "c")
   380 import scala.util.Try
   250 println(lexing(r1, "abcd"))
   381 
   251 
   382 def tests(n: Int, m: Int, s: String) = {
   252 val r2 = ("" | "a") ~ ("ab" | "b")
   383   val rs = enum(n, s)
   253 println(lexing(r2, "ab"))
   384   val ss = strs(m, s)
   254 
   385   println(s"cases generated: ${rs.size} regexes and ${ss.size} strings")
   255 
   386   for (r1 <- rs.par; s1 <- ss.par) yield {
   256 // Two Simple While Tests
   387     val res1 = Try(Some(lexing(r1, s1))).getOrElse(None)
   257 //========================
   388     val res2 = Try(Some(lexing_simp(r1, s1))).getOrElse(None)
   258 println("prog0 test")
   389     if (res1 != res2) println(s"Disagree on ${r1} and ${s1}")
   259 
   390     if (res1 != res2) Some((r1, s1)) else None
   260 val prog0 = """read n"""
   391   }
   261 println(env(lexing_simp(WHILE_REGS, prog0)))
   392 }
   262 
   393 
   263 println("prog1 test")
   394 println("Testing")
   264 
   395 println(tests(2,7,"abc"))
   265 val prog1 = """read  n; write (n)"""
   396 
   266 println(env(lexing_simp(WHILE_REGS, prog1)))
   397 
   267 
   398 
   268 
       
   269 // Bigger Test
       
   270 //=============
       
   271 
       
   272 val prog2 = """
       
   273 write "fib";
       
   274 read n;
       
   275 minus1 := 0;
       
   276 minus2 := 1;
       
   277 while n > 0 do {
       
   278   temp := minus2;
       
   279   minus2 := minus1 + minus2;
       
   280   minus1 := temp;
       
   281   n := n - 1
       
   282 };
       
   283 write "result";
       
   284 write minus2
       
   285 """
       
   286 
       
   287 println("Tokens")
       
   288 println(env(lexing_simp(WHILE_REGS, prog2)))
       
   289 println(env(lexing_simp(WHILE_REGS, prog2)).filterNot{_._1 == "w"}.mkString("\n"))
       
   290 
       
   291 // some more timing tests with
       
   292 // i copies of the program
       
   293 
       
   294 for (i <- 1 to 21 by 10) {
       
   295   print(i.toString + ":  ")
       
   296   time(lexing_simp(WHILE_REGS, prog2 * i))
       
   297 }
       
   298 
       
   299 abstract class Token
       
   300 case class KeyToken(s: String) extends Token
       
   301 case class IdToken(s: String) extends Token
       
   302 
       
   303 list[(STRING, STRING)]=> List(TOKEN)