diff -r 62c38f4b1dae -r ee35eeb5831a solutions/cw2/lexer.sc --- a/solutions/cw2/lexer.sc Sun Oct 01 12:04:51 2023 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,394 +0,0 @@ -// CW 2 -//====== - - - -// Rexp -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - -case class RANGE(s: Set[Char]) extends Rexp -case class PLUS(r: Rexp) extends Rexp -case class OPTIONAL(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - -// Values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - - -// Convenience for typing -def charlist2rexp(s : List[Char]): Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} - -implicit def string2rexp(s : String) : Rexp = - charlist2rexp(s.toList) - -extension (r: Rexp) { - def ~ (s: Rexp) = SEQ(r, s) - def % = STAR(r) - def | (s: Rexp) = ALT(r, s) -} - - -extension (s: String) { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} - -// nullable -def nullable(r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - - case RECD(_, r1) => nullable(r1) - case RANGE(_) => false - case PLUS(r1) => nullable(r1) - case OPTIONAL(_) => true - case NTIMES(r1, i) => if (i == 0) true else nullable(r1) -} - -// der -def der(c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - - case RECD(_, r1) => der(c, r1) - case RANGE(s) => if (s.contains(c)) ONE else ZERO - case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) - case OPTIONAL(r1) => der(c, r1) - case NTIMES(r, i) => - if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -// Flatten -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString - case Rec(_, v) => flatten(v) -} - -// Env -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Sequ(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) -} - -// Mkeps -def mkeps(r: Rexp) : Val = r match { - case ONE => Empty - case ALT(r1, r2) => - if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) - case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) - case STAR(r) => Stars(Nil) - case RECD(x, r) => Rec(x, mkeps(r)) - - case PLUS(r) => Stars(List(mkeps(r))) // the first copy must match the empty string - case OPTIONAL(r) => if (nullable(r)) Stars(List(mkeps(r))) else Stars(Nil) - case NTIMES(r, i) => Stars(List.fill(i)(mkeps(r))) -} - -// Inj -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { - case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) - case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) - case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) - case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) - - case (RANGE(_), Empty) => Chr(c) - case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (OPTIONAL(r), v1) => Stars(List(inj(r, c, v1))) - case (NTIMES(r, n), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) -} - -// Rectification functions -def F_ID(v: Val): Val = v -def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) -def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) -def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Right(v) => Right(f2(v)) - case Left(v) => Left(f1(v)) -} -def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) -} -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} -def F_ERROR(v: Val): Val = throw new Exception("error") - -// Simp -def simp(r: Rexp): (Rexp, Val => Val) = r match { - case ALT(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (r2s, F_RIGHT(f2s)) - case (_, ZERO) => (r1s, F_LEFT(f1s)) - case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) - } - } - case SEQ(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (ZERO, F_ERROR) - case (_, ZERO) => (ZERO, F_ERROR) - case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) - case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) - case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) - } - } - case r => (r, F_ID) -} - -// Lex -def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else - { throw new Exception("lexing error") } - case c::cs => { - val (r_simp, f_simp) = simp(der(c, r)) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } -} - -def ders_simp(cs: List[Char], r: Rexp) : Rexp = cs match { - case Nil => r - case c::cs => ders_simp(cs, simp(der(c, r))._1) -} - -def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList)) - -// Language specific code -val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" -val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||" -val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z').toSet) -val SYM : Rexp = RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':')) -val PARENS : Rexp = "(" | "{" | ")" | "}" -val SEMI : Rexp = ";" -val WHITESPACE : Rexp = PLUS(" ") | "\n" | "\t" | "\r" -val DIGIT : Rexp = RANGE(('0' to '9').toSet) -val DIGIT1 : Rexp = RANGE(('1' to '9').toSet) -val STRING : Rexp = "\"" ~ (LET | DIGIT | SYM | PARENS | " " | "\\n").% ~ "\"" -val ID : Rexp = LET ~ (LET | "_" | DIGIT).% -val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%) -val EOL : Rexp = "\n" | "\r\n" -val COMMENT : Rexp = "//" ~ (LET | DIGIT | SYM | " ").% ~ EOL - -val WHILE_REGS = (("k" $ KEYWORD) | - ("o" $ OP) | - ("str" $ STRING) | - ("p" $ PARENS) | - ("s" $ SEMI) | - ("w" $ WHITESPACE) | - ("i" $ ID) | - ("n" $ NUM) | - ("c" $ COMMENT)).% - -def esc(raw: String): String = { - import scala.reflect.runtime.universe._ - Literal(Constant(raw)).toString -} - -def escape(tks: List[(String, String)]) = - tks.map{ case (s1, s2) => (s1, esc(s2))} - -// Token -abstract class Token extends Serializable -case class T_KEYWORD(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_STRING(s: String) extends Token -case class T_PAREN(s: String) extends Token -case object T_SEMI extends Token -case class T_ID(s: String) extends Token -case class T_NUM(n: Int) extends Token - -val token : PartialFunction[(String, String), Token] = { - case ("k", s) => T_KEYWORD(s) - case ("o", s) => T_OP(s) - case ("str", s) => T_STRING(s) - case ("p", s) => T_PAREN(s) - case ("s", _) => T_SEMI - case ("i", s) => T_ID(s) - case ("n", s) => T_NUM(s.toInt) -} - -// Tokenise -def tokenise(s: String) : List[Token] = - lexing_simp(WHILE_REGS, s).collect(token) - - -// Q2 Tests - -lex_simp(NTIMES("a", 3), "aaa".toList) -lex_simp(NTIMES(("a" | ONE), 3), "aa".toList) - -// Q3 Programs - -val prog1 = """write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { -temp := minus2; -minus2 := minus1 + minus2; -minus1 := temp; -n := n - 1 -}; -write "Result"; -write minus2""" - -val prog2 = """start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { -while 0 < y do { -while 0 < z do { z := z - 1 }; -z := start; -y := y - 1 -}; -y := start; -x := x - 1 -}""" - -val prog3 = """write "Input n please"; -read n; -write "The factors of n are"; -f := 2; -while n != 1 do { -while (n / f) * f == n do { -write f; -n := n / f -}; -f := f + 1 -}""" - -println(tokenise(prog1)) -println(tokenise(prog2)) -println(tokenise(prog3)) - - -// More tests - -println(lex_simp("x" $ OPTIONAL("a"), "a".toList)) -println(lex_simp("x" $ OPTIONAL("a"), "".toList)) -println(lex_simp("x" $ NTIMES(OPTIONAL("a"),4), "aa".toList)) -println(lex_simp("x" $ OPTIONAL("aa"), "aa".toList)) -println(lex_simp("x" $ OPTIONAL("aa"), "".toList)) - - - - -//=================== -println("Fib") -println(tokenise(os.read(os.pwd / "fib.while"))) - -println("Factors") -println(tokenise(os.read(os.pwd / "factors.while"))) - -println("Loops") -println(tokenise(os.read(os.pwd / "loops.while"))) - -println("Collatz") -println(tokenise(os.read(os.pwd / "collatz.while"))) - -println("Collatz 2") -println(tokenise(os.read(os.pwd / "collatz2.while"))) - -println("Primes") -println(tokenise(os.read(os.pwd / "primes.while"))) - - -// some testcases for simplification - -def size(r: Rexp): Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size (r2) - case SEQ(r1, r2) => 1 + size(r1) + size (r2) - case STAR(r1) => 1 + size(r1) - case PLUS(r1) => 1 + size(r1) - case NTIMES(r1, n) => 1 + size(r1) - case RECD(_, r1) => 1 + size(r1) - case RANGE(_) => 1 -} - -val reg = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -size(reg) // 5 -size(der('a', reg)) // 12 -size(simp(der('a', reg))._1) // 8 -size(simp(der('a', der('a', reg)))._1) // 8 - - - -// python tests -println(size(WHILE_REGS)) -println(size(ders_simp("r".toList, WHILE_REGS))) -println(size(ID)) -println(size(ders_simp("read".toList, ID))) - - - -// for automated testing - -@main -def main(file: String) = { - // empty - nothing to run -} - -@main -def test(file: String) = { - val contents = os.read(os.pwd / file) - tokenise(contents) -} - -