solutions/cw5/fun_tokens.sc
changeset 920 7af2eea19646
parent 903 2f86ebda3629
equal deleted inserted replaced
919:53f08d873e09 920:7af2eea19646
    31 case class Chr(c: Char) extends Val
    31 case class Chr(c: Char) extends Val
    32 case class Sequ(v1: Val, v2: Val) extends Val
    32 case class Sequ(v1: Val, v2: Val) extends Val
    33 case class Left(v: Val) extends Val
    33 case class Left(v: Val) extends Val
    34 case class Right(v: Val) extends Val
    34 case class Right(v: Val) extends Val
    35 case class Stars(vs: List[Val]) extends Val
    35 case class Stars(vs: List[Val]) extends Val
    36 case class Opt(v: Val) extends Val
       
    37 case class Pls(vs: List[Val]) extends Val
       
    38 case class Nt(vs: List[Val]) extends Val
       
    39 case class Rec(x: String, v: Val) extends Val
    36 case class Rec(x: String, v: Val) extends Val
    40 
    37 
    41 // some convenience for typing in regular expressions
    38 // some convenience for typing in regular expressions
    42 def charlist2rexp(s : List[Char]): Rexp = s match {
    39 def charlist2rexp(s : List[Char]): Rexp = s match {
    43   case Nil => ONE
    40   case Nil => ONE
    44   case c::Nil => CHAR(c)
    41   case c::Nil => CHAR(c)
    45   case c::vs => SEQ(CHAR(c), charlist2rexp(vs))
    42   case c::vs => SEQ(CHAR(c), charlist2rexp(vs))
    46 }
    43 }
    47 
    44 
    48 implicit def string2rexp(s : String) : Rexp =
    45 implicit def string2rexp(s : String) : Rexp = 
    49   charlist2rexp(s.toList)
    46   charlist2rexp(s.toList)
    50 
    47 
    51 implicit def RexpOps(r: Rexp) = new {
    48 extension (r: Rexp) {
    52   def | (s: Rexp) = ALT(r, s)
    49   def | (s: Rexp) = ALT(r, s)
    53   def % = STAR(r)
    50   def % = STAR(r)
    54   def ? = OPTIONAL(r)
       
    55   def + = PLUS(r)
    51   def + = PLUS(r)
    56   def ^ (n: Int) = NTIMES(r, n)
       
    57   def ~ (s: Rexp) = SEQ(r, s)
    52   def ~ (s: Rexp) = SEQ(r, s)
    58 }
    53 }
    59 
    54 
    60 implicit def stringOps(s: String) = new {
    55 extension (s: String) {
    61   def | (r: Rexp) = ALT(s, r)
    56   def | (r: Rexp) = ALT(s, r)
    62   def | (r: String) = ALT(s, r)
    57   def | (r: String) = ALT(s, r)
    63   def % = STAR(s)
    58   def % = STAR(s)
    64   def ? = OPTIONAL(s)
       
    65   def + = PLUS(s)
       
    66   def ^ (n: Int) = NTIMES(s, n)
       
    67   def ~ (r: Rexp) = SEQ(s, r)
    59   def ~ (r: Rexp) = SEQ(s, r)
    68   def ~ (r: String) = SEQ(s, r)
    60   def ~ (r: String) = SEQ(s, r)
    69   def $ (r: Rexp) = RECD(s, r)
    61   def $ (r: Rexp) = RECD(s, r)
    70 }
    62 }
       
    63 
    71 
    64 
    72 def nullable(r: Rexp) : Boolean = r match {
    65 def nullable(r: Rexp) : Boolean = r match {
    73   case ZERO => false
    66   case ZERO => false
    74   case ONE => true
    67   case ONE => true
    75   case CHAR(_) => false
    68   case CHAR(_) => false
   105   case Chr(c) => c.toString
    98   case Chr(c) => c.toString
   106   case Left(v) => flatten(v)
    99   case Left(v) => flatten(v)
   107   case Right(v) => flatten(v)
   100   case Right(v) => flatten(v)
   108   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
   101   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
   109   case Stars(vs) => vs.map(flatten).mkString
   102   case Stars(vs) => vs.map(flatten).mkString
   110   case Opt(v) => flatten(v)
       
   111   case Pls(vs) => vs.map(flatten).mkString
       
   112   case Nt(vs) => vs.map(flatten).mkString
       
   113   case Rec(_, v) => flatten(v)
   103   case Rec(_, v) => flatten(v)
   114 }
   104 }
   115 
   105 
   116 // extracts an environment from a value;
   106 // extracts an environment from a value;
   117 // used for tokenising a string
   107 // used for tokenising a string
   120   case Chr(c) => Nil
   110   case Chr(c) => Nil
   121   case Left(v) => env(v)
   111   case Left(v) => env(v)
   122   case Right(v) => env(v)
   112   case Right(v) => env(v)
   123   case Sequ(v1, v2) => env(v1) ::: env(v2)
   113   case Sequ(v1, v2) => env(v1) ::: env(v2)
   124   case Stars(vs) => vs.flatMap(env)
   114   case Stars(vs) => vs.flatMap(env)
   125   case Opt(v) => env(v)
       
   126   case Pls(vs) => vs.flatMap(env)
       
   127   case Nt(vs) => vs.flatMap(env)
       
   128   case Rec(x, v) => (x, flatten(v))::env(v)
   115   case Rec(x, v) => (x, flatten(v))::env(v)
   129 }
   116 }
   130 
   117 
   131 
   118 
   132 // The injection and mkeps part of the lexer
   119 // The injection and mkeps part of the lexer
   133 //===========================================
   120 //===========================================
   134 
   121 
       
   122 // Mkeps
   135 def mkeps(r: Rexp) : Val = r match {
   123 def mkeps(r: Rexp) : Val = r match {
   136   case ONE => Empty
   124   case ONE => Empty
   137   case RANGE(chars) => throw new Exception("lexing error")  // this will never be called but the coursework asks for it so...
   125   case ALT(r1, r2) => 
   138   case ALT(r1, r2) =>
       
   139     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   126     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   140   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   127   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   141   case STAR(r) => Stars(Nil)
   128   case STAR(r) => Stars(Nil)
   142   case OPTIONAL(r) => Opt(Empty)
       
   143   case PLUS(r) => Pls(List(mkeps(r))) // scala define a list with one element
       
   144   case NTIMES(r, n) => if (n == 0) Nt(Nil) else Nt(List.fill(n)(mkeps(r))) // wrong
       
   145   case RECD(x, r) => Rec(x, mkeps(r))
   129   case RECD(x, r) => Rec(x, mkeps(r))
   146   case _ => throw new Exception("lexing error")
   130 
   147 }
   131   case PLUS(r) => Stars(List(mkeps(r)))   // the first copy must match the empty string
   148 
   132   case OPTIONAL(r) => if (nullable(r)) Stars(List(mkeps(r))) else Stars(Nil)
       
   133   case NTIMES(r, i) => Stars(List.fill(i)(mkeps(r)))
       
   134 }
       
   135 
       
   136 // Inj
   149 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   137 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   150   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   138   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   151   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   139   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   152   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   140   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   153   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   141   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   154   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   142   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   155   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   143   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   156   case (CHAR(d), Empty) => Chr(c)
   144   case (CHAR(d), Empty) => Chr(c) 
   157   case (RANGE(chars), Empty) => Chr(c)
       
   158   case (OPTIONAL(r1), v) => Opt(inj(r1, c, v))
       
   159   case (PLUS(r1), Sequ(v1, Stars(vs))) => Pls(inj(r1, c, v1)::vs)
       
   160   case (NTIMES(r1, n), Sequ(v1, Nt(vs))) => Nt(inj(r1, c, v1)::vs)
       
   161   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   145   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   162 }
   146 
       
   147   case (RANGE(_), Empty) => Chr(c)
       
   148   case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   149   case (OPTIONAL(r), v1) => Stars(List(inj(r, c, v1)))
       
   150   case (NTIMES(r, n), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   151 }
       
   152 
   163 
   153 
   164 // some "rectification" functions for simplification
   154 // some "rectification" functions for simplification
   165 def F_ID(v: Val): Val = v
   155 def F_ID(v: Val): Val = v
   166 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   156 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   167 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
   157 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))