diff -r c7009356ddd8 -r c0600f8b6427 solutions/cw3/lexer.sc --- a/solutions/cw3/lexer.sc Wed May 29 13:25:30 2024 +0100 +++ b/solutions/cw3/lexer.sc Thu Sep 19 15:47:33 2024 +0100 @@ -1,21 +1,24 @@ // Lexer from CW2 //================ +//> using toolkit 0.4.0 +//> using file project.sc +import project.* // Rexp abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp case class RECD(x: String, r: Rexp) extends Rexp case class RANGE(s: Set[Char]) extends Rexp case class PLUS(r: Rexp) extends Rexp case class OPTIONAL(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp // Values abstract class Val @@ -35,7 +38,7 @@ case c::s => SEQ(CHAR(c), charlist2rexp(s)) } -implicit def string2rexp(s : String) : Rexp = +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) extension (r: Rexp) { @@ -50,7 +53,7 @@ def % = STAR(s) def ~ (r: Rexp) = SEQ(s, r) def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) + infix def $ (r: Rexp) = RECD(s, r) } // nullable @@ -75,16 +78,16 @@ case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) case RECD(_, r1) => der(c, r1) - case RANGE(s) => if (s.contains(c)) ONE else ZERO + case RANGE(s) => if (s.contains(c)) ONE else ZERO case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) case OPTIONAL(r1) => der(c, r1) - case NTIMES(r, i) => + case NTIMES(r, i) => if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) } @@ -113,7 +116,7 @@ // Mkeps def mkeps(r: Rexp) : Val = r match { case ONE => Empty - case ALT(r1, r2) => + case ALT(r1, r2) => if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) case STAR(r) => Stars(Nil) @@ -132,7 +135,7 @@ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) + case (CHAR(d), Empty) => Chr(c) case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) case (RANGE(_), Empty) => Chr(c) @@ -152,9 +155,9 @@ def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) } -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = +def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = +def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = (v:Val) => Sequ(f1(v), f2(Empty)) def F_RECD(f: Val => Val) = (v:Val) => v match { case Rec(x, v) => Rec(x, f(v)) @@ -170,7 +173,7 @@ case (ZERO, _) => (r2s, F_RIGHT(f2s)) case (_, ZERO) => (r1s, F_LEFT(f1s)) case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) + else (ALT (r1s, r2s), F_ALT(f1s, f2s)) } } case SEQ(r1, r2) => { @@ -189,8 +192,8 @@ // Lex def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else - { throw new Exception("lexing error") } + case Nil => if (nullable(r)) mkeps(r) else + { throw new Exception("lexing error") } case c::cs => { val (r_simp, f_simp) = simp(der(c, r)) inj(r, c, f_simp(lex_simp(r_simp, cs))) @@ -200,7 +203,7 @@ def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList)) // Language specific code -val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" +val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||" val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z').toSet) val SYM : Rexp = RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':', ')', '(')) @@ -213,31 +216,51 @@ val ID : Rexp = LET ~ (LET | "_" | DIGIT).% val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%) val EOL : Rexp = "\n" | "\r\n" -val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL +val COMMENT : Rexp = "//" ~ (LET | SYM | PARENS | " " | DIGIT).% ~ EOL -val WHILE_REGS = (("k" $ KEYWORD) | - ("o" $ OP) | +val WHILE_REGS = (("k" $ KEYWORD) | + ("o" $ OP) | ("str" $ STRING) | ("p" $ PARENS) | - ("s" $ SEMI) | - ("w" $ WHITESPACE) | - ("i" $ ID) | + ("s" $ SEMI) | + ("w" $ WHITESPACE) | + ("i" $ ID) | ("n" $ NUM) | - ("c" $ COMMENT)).% + ("c" $ COMMENT)).% + + +def escapedChar(ch: Char): String = ch match { + case '\b' => "\\b" + case '\t' => "\\t" + case '\n' => "\\n" + case '\f' => "\\f" + case '\r' => "\\r" + case '"' => "\\\"" + case '\'' => "\\\'" + case '\\' => "\\\\" + case _ => if (ch.isControl) "\\0" + Integer.toOctalString(ch.toInt) + else String.valueOf(ch) +} + +def esc(s: String): String = "\"" + escapeImpl(s) + "\"" +def escapeImpl(s: String): String = s.flatMap(escapedChar) + +/* def esc(raw: String): String = { import scala.reflect.runtime.universe._ Literal(Constant(raw)).toString } +*/ def escape(tks: List[(String, String)]) = - tks.map{ case (s1, s2) => (esc(s1), esc(s2))} + tks.map{ case (s1, s2) => (s1, s2)} // Tokens -abstract class Token extends Serializable +abstract class Token extends Serializable case class T_KEYWORD(s: String) extends Token case class T_OP(s: String) extends Token case class T_STRING(s: String) extends Token @@ -257,11 +280,10 @@ } // Tokenise -def tokenise(s: String) = //: List[Token] = - escape(lexing_simp(WHILE_REGS, s)).filter{p => p._1 != "\"w\""}//.collect(token) +def tokenise(s: String) = //: List[Token] = + escape(lexing_simp(WHILE_REGS, s)).collect(token) println(tokenise(os.read(os.pwd / "primes.while"))) -