--- a/solutions/cw3/lexer.sc Wed May 29 13:25:30 2024 +0100
+++ b/solutions/cw3/lexer.sc Thu Sep 19 15:47:33 2024 +0100
@@ -1,21 +1,24 @@
// Lexer from CW2
//================
+//> using toolkit 0.4.0
+//> using file project.sc
+import project.*
// Rexp
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
case class RECD(x: String, r: Rexp) extends Rexp
case class RANGE(s: Set[Char]) extends Rexp
case class PLUS(r: Rexp) extends Rexp
case class OPTIONAL(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
// Values
abstract class Val
@@ -35,7 +38,7 @@
case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
-implicit def string2rexp(s : String) : Rexp =
+implicit def string2rexp(s : String) : Rexp =
charlist2rexp(s.toList)
extension (r: Rexp) {
@@ -50,7 +53,7 @@
def % = STAR(s)
def ~ (r: Rexp) = SEQ(s, r)
def ~ (r: String) = SEQ(s, r)
- def $ (r: Rexp) = RECD(s, r)
+ infix def $ (r: Rexp) = RECD(s, r)
}
// nullable
@@ -75,16 +78,16 @@
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
+ case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r) => SEQ(der(c, r), STAR(r))
case RECD(_, r1) => der(c, r1)
- case RANGE(s) => if (s.contains(c)) ONE else ZERO
+ case RANGE(s) => if (s.contains(c)) ONE else ZERO
case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
case OPTIONAL(r1) => der(c, r1)
- case NTIMES(r, i) =>
+ case NTIMES(r, i) =>
if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
}
@@ -113,7 +116,7 @@
// Mkeps
def mkeps(r: Rexp) : Val = r match {
case ONE => Empty
- case ALT(r1, r2) =>
+ case ALT(r1, r2) =>
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
case STAR(r) => Stars(Nil)
@@ -132,7 +135,7 @@
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
+ case (CHAR(d), Empty) => Chr(c)
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
case (RANGE(_), Empty) => Chr(c)
@@ -152,9 +155,9 @@
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
(v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
(v:Val) => Sequ(f1(v), f2(Empty))
def F_RECD(f: Val => Val) = (v:Val) => v match {
case Rec(x, v) => Rec(x, f(v))
@@ -170,7 +173,7 @@
case (ZERO, _) => (r2s, F_RIGHT(f2s))
case (_, ZERO) => (r1s, F_LEFT(f1s))
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
+ else (ALT (r1s, r2s), F_ALT(f1s, f2s))
}
}
case SEQ(r1, r2) => {
@@ -189,8 +192,8 @@
// Lex
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else
- { throw new Exception("lexing error") }
+ case Nil => if (nullable(r)) mkeps(r) else
+ { throw new Exception("lexing error") }
case c::cs => {
val (r_simp, f_simp) = simp(der(c, r))
inj(r, c, f_simp(lex_simp(r_simp, cs)))
@@ -200,7 +203,7 @@
def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
// Language specific code
-val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip"
+val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip"
val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||"
val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z').toSet)
val SYM : Rexp = RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':', ')', '('))
@@ -213,31 +216,51 @@
val ID : Rexp = LET ~ (LET | "_" | DIGIT).%
val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%)
val EOL : Rexp = "\n" | "\r\n"
-val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL
+val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL
-val WHILE_REGS = (("k" $ KEYWORD) |
- ("o" $ OP) |
+val WHILE_REGS = (("k" $ KEYWORD) |
+ ("o" $ OP) |
("str" $ STRING) |
("p" $ PARENS) |
- ("s" $ SEMI) |
- ("w" $ WHITESPACE) |
- ("i" $ ID) |
+ ("s" $ SEMI) |
+ ("w" $ WHITESPACE) |
+ ("i" $ ID) |
("n" $ NUM) |
- ("c" $ COMMENT)).%
+ ("c" $ COMMENT)).%
+
+
+def escapedChar(ch: Char): String = ch match {
+ case '\b' => "\\b"
+ case '\t' => "\\t"
+ case '\n' => "\\n"
+ case '\f' => "\\f"
+ case '\r' => "\\r"
+ case '"' => "\\\""
+ case '\'' => "\\\'"
+ case '\\' => "\\\\"
+ case _ => if (ch.isControl) "\\0" + Integer.toOctalString(ch.toInt)
+ else String.valueOf(ch)
+}
+
+def esc(s: String): String = "\"" + escapeImpl(s) + "\""
+def escapeImpl(s: String): String = s.flatMap(escapedChar)
+
+/*
def esc(raw: String): String = {
import scala.reflect.runtime.universe._
Literal(Constant(raw)).toString
}
+*/
def escape(tks: List[(String, String)]) =
- tks.map{ case (s1, s2) => (esc(s1), esc(s2))}
+ tks.map{ case (s1, s2) => (s1, s2)}
// Tokens
-abstract class Token extends Serializable
+abstract class Token extends Serializable
case class T_KEYWORD(s: String) extends Token
case class T_OP(s: String) extends Token
case class T_STRING(s: String) extends Token
@@ -257,11 +280,10 @@
}
// Tokenise
-def tokenise(s: String) = //: List[Token] =
- escape(lexing_simp(WHILE_REGS, s)).filter{p => p._1 != "\"w\""}//.collect(token)
+def tokenise(s: String) = //: List[Token] =
+ escape(lexing_simp(WHILE_REGS, s)).collect(token)
println(tokenise(os.read(os.pwd / "primes.while")))
-