solutions/cw3/lexer.sc
changeset 961 c0600f8b6427
parent 959 64ec1884d860
--- a/solutions/cw3/lexer.sc	Wed May 29 13:25:30 2024 +0100
+++ b/solutions/cw3/lexer.sc	Thu Sep 19 15:47:33 2024 +0100
@@ -1,21 +1,24 @@
 // Lexer from CW2
 //================
 
+//> using toolkit 0.4.0
+//> using file project.sc
+import project.*
 
 // Rexp
 abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
 case class RECD(x: String, r: Rexp) extends Rexp
 
 case class RANGE(s: Set[Char]) extends Rexp
 case class PLUS(r: Rexp) extends Rexp
 case class OPTIONAL(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp 
+case class NTIMES(r: Rexp, n: Int) extends Rexp
 
 // Values
 abstract class Val
@@ -35,7 +38,7 @@
   case c::s => SEQ(CHAR(c), charlist2rexp(s))
 }
 
-implicit def string2rexp(s : String) : Rexp = 
+implicit def string2rexp(s : String) : Rexp =
   charlist2rexp(s.toList)
 
 extension (r: Rexp) {
@@ -50,7 +53,7 @@
   def % = STAR(s)
   def ~ (r: Rexp) = SEQ(s, r)
   def ~ (r: String) = SEQ(s, r)
-  def $ (r: Rexp) = RECD(s, r)
+  infix def $ (r: Rexp) = RECD(s, r)
 }
 
 // nullable
@@ -75,16 +78,16 @@
   case ONE => ZERO
   case CHAR(d) => if (c == d) ONE else ZERO
   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
-  case SEQ(r1, r2) => 
+  case SEQ(r1, r2) =>
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
     else SEQ(der(c, r1), r2)
   case STAR(r) => SEQ(der(c, r), STAR(r))
 
   case RECD(_, r1) => der(c, r1)
-  case RANGE(s) => if (s.contains(c)) ONE else ZERO 
+  case RANGE(s) => if (s.contains(c)) ONE else ZERO
   case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
   case OPTIONAL(r1) => der(c, r1)
-  case NTIMES(r, i) => 
+  case NTIMES(r, i) =>
     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
 }
 
@@ -113,7 +116,7 @@
 // Mkeps
 def mkeps(r: Rexp) : Val = r match {
   case ONE => Empty
-  case ALT(r1, r2) => 
+  case ALT(r1, r2) =>
     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   case STAR(r) => Stars(Nil)
@@ -132,7 +135,7 @@
   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
-  case (CHAR(d), Empty) => Chr(c) 
+  case (CHAR(d), Empty) => Chr(c)
   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
 
   case (RANGE(_), Empty) => Chr(c)
@@ -152,9 +155,9 @@
 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
 }
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
   (v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
   (v:Val) => Sequ(f1(v), f2(Empty))
 def F_RECD(f: Val => Val) = (v:Val) => v match {
   case Rec(x, v) => Rec(x, f(v))
@@ -170,7 +173,7 @@
       case (ZERO, _) => (r2s, F_RIGHT(f2s))
       case (_, ZERO) => (r1s, F_LEFT(f1s))
       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
-                else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
+                else (ALT (r1s, r2s), F_ALT(f1s, f2s))
     }
   }
   case SEQ(r1, r2) => {
@@ -189,8 +192,8 @@
 
 // Lex
 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
-  case Nil => if (nullable(r)) mkeps(r) else 
-    { throw new Exception("lexing error") } 
+  case Nil => if (nullable(r)) mkeps(r) else
+    { throw new Exception("lexing error") }
   case c::cs => {
     val (r_simp, f_simp) = simp(der(c, r))
     inj(r, c, f_simp(lex_simp(r_simp, cs)))
@@ -200,7 +203,7 @@
 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
 
 // Language specific code
-val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" 
+val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip"
 val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||"
 val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z').toSet)
 val SYM : Rexp = RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':', ')', '('))
@@ -213,31 +216,51 @@
 val ID : Rexp = LET ~ (LET | "_" | DIGIT).%
 val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%)
 val EOL : Rexp = "\n" | "\r\n"
-val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL 
+val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL
 
-val WHILE_REGS = (("k" $ KEYWORD) | 
-                  ("o" $ OP) | 
+val WHILE_REGS = (("k" $ KEYWORD) |
+                  ("o" $ OP) |
                   ("str" $ STRING) |
                   ("p" $ PARENS) |
-                  ("s" $ SEMI) | 
-                  ("w" $ WHITESPACE) | 
-                  ("i" $ ID) | 
+                  ("s" $ SEMI) |
+                  ("w" $ WHITESPACE) |
+                  ("i" $ ID) |
                   ("n" $ NUM) |
-		  ("c" $ COMMENT)).%
+                  ("c" $ COMMENT)).%
+
+
 
 
 
+def escapedChar(ch: Char): String = ch match {
+  case '\b' => "\\b"
+  case '\t' => "\\t"
+  case '\n' => "\\n"
+  case '\f' => "\\f"
+  case '\r' => "\\r"
+  case '"'  => "\\\""
+  case '\'' => "\\\'"
+  case '\\' => "\\\\"
+  case _    => if (ch.isControl) "\\0" + Integer.toOctalString(ch.toInt)
+               else              String.valueOf(ch)
+}
+
+def esc(s: String): String = "\"" + escapeImpl(s) + "\""
+def escapeImpl(s: String): String = s.flatMap(escapedChar)
+
+/*
 def esc(raw: String): String = {
   import scala.reflect.runtime.universe._
   Literal(Constant(raw)).toString
 }
+*/
 
 def escape(tks: List[(String, String)]) =
-  tks.map{ case (s1, s2) => (esc(s1), esc(s2))}
+  tks.map{ case (s1, s2) => (s1, s2)}
 
 
 // Tokens
-abstract class Token extends Serializable 
+abstract class Token extends Serializable
 case class T_KEYWORD(s: String) extends Token
 case class T_OP(s: String) extends Token
 case class T_STRING(s: String) extends Token
@@ -257,11 +280,10 @@
 }
 
 // Tokenise
-def tokenise(s: String) = //: List[Token] = 
-  escape(lexing_simp(WHILE_REGS, s)).filter{p => p._1 != "\"w\""}//.collect(token)
+def tokenise(s: String) = //: List[Token] =
+  escape(lexing_simp(WHILE_REGS, s)).collect(token)
 
 
 
 
 println(tokenise(os.read(os.pwd / "primes.while")))
-