# HG changeset patch # User Christian Urban # Date 1599477487 -3600 # Node ID 1c9a23304b859c4bd789d04c155bfd349b53bf45 # Parent d94fdbef1a4f65d930e9b569998683db0cb04e5b ammonite diff -r d94fdbef1a4f -r 1c9a23304b85 Attic/parser2a.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/parser2a.scala Mon Sep 07 12:18:07 2020 +0100 @@ -0,0 +1,105 @@ +// Parser combinators including semantic actions +// parses lists of tokens +// +// Needs +// :load matcher.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_IF extends Token +case object T_THEN extends Token +case object T_ELSE extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List(("if", (s) => T_IF), + ("then", (s) => T_THEN), + ("else", (s) => T_ELSE), + (NUMBER, (s) => T_NUM(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// parser combinators with return type T +abstract class Parser[T] { + def parse(ts: List[Token]): Set[(T, List[Token])] + + def parse_all(ts: List[Token]) : Set[T] = + for ((head, tail) <- parse(ts); if (tail == Nil)) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) + def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) + +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: List[Token]) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse (sb: List[Token]) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +case class TokParser(tok: Token) extends Parser[Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T +lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F +lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser + +println(E.parse_all(Tok.fromString("1 + 2 + 3"))) +println(E.parse_all(Tok.fromString("1 + 2 * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) + +// Excercise: implement minus +println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r d94fdbef1a4f -r 1c9a23304b85 Attic/parser5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/parser5.scala Mon Sep 07 12:18:07 2020 +0100 @@ -0,0 +1,113 @@ +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token + +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +// parser combinators with input type I and return type T +// and memoisation + +case class SubList[T](s: List[T], l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def sublist(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubList(s, low, high) +} + +type Ctxt[T] = List[(String, SubList[T])] + +abstract class Parser[I, T] { + + def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] + + def parse_all(s: List[I]) : Set[T] = + for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case object NumParser extends Parser[Token, Int] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { + case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) + case _ => Set() + } + else Set() + } +} + +case class TokParser(t: Token) extends Parser[Token, Token] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) + else Set() + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +lazy val E: Parser[Token, Int] = + new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || + (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || + NumParser) + +println(E.parse_all(Tk.fromString("1 + 2 * 3"))) +println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r d94fdbef1a4f -r 1c9a23304b85 Attic/token.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/token.scala Mon Sep 07 12:18:07 2020 +0100 @@ -0,0 +1,342 @@ +// A simple lexer inspired by work of Sulzmann & Lu +//================================================== + + +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +// regular expressions including records +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp + +// values +abstract class Val +case object Empty extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(v: Val) extends Val +case class Right(v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case class Rec(x: String, v: Val) extends Val + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = + charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) + def $ (r: Rexp) = RECD(s, r) +} + +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case RECD(_, r1) => nullable(r1) +} + +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case RECD(_, r1) => der(c, r1) +} + + +// extracts a string from value +def flatten(v: Val) : String = v match { + case Empty => "" + case Chr(c) => c.toString + case Left(v) => flatten(v) + case Right(v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Stars(vs) => vs.map(flatten).mkString + case Rec(_, v) => flatten(v) +} + + +// extracts an environment from a value; +// used for tokenise a string +def env(v: Val) : List[(String, String)] = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => env(v) + case Right(v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) + case Rec(x, v) => (x, flatten(v))::env(v) +} + +// The Injection Part of the lexer + +def mkeps(r: Rexp) : Val = r match { + case ONE => Empty + case ALT(r1, r2) => + if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case STAR(r) => Stars(Nil) + case RECD(x, r) => Rec(x, mkeps(r)) +} + +def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { + case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) + case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) + case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) + case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) + case (CHAR(d), Empty) => Chr(c) + case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) +} + +// some "rectification" functions for simplification +def F_ID(v: Val): Val = v +def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) +def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) +def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { + case Right(v) => Right(f2(v)) + case Left(v) => Left(f1(v)) +} +def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { + case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) +} +def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = + (v:Val) => Sequ(f1(Empty), f2(v)) +def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = + (v:Val) => Sequ(f1(v), f2(Empty)) +def F_RECD(f: Val => Val) = (v:Val) => v match { + case Rec(x, v) => Rec(x, f(v)) +} +def F_ERROR(v: Val): Val = throw new Exception("error") + +def simp(r: Rexp): (Rexp, Val => Val) = r match { + case ALT(r1, r2) => { + val (r1s, f1s) = simp(r1) + val (r2s, f2s) = simp(r2) + (r1s, r2s) match { + case (ZERO, _) => (r2s, F_RIGHT(f2s)) + case (_, ZERO) => (r1s, F_LEFT(f1s)) + case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) + else (ALT (r1s, r2s), F_ALT(f1s, f2s)) + } + } + case SEQ(r1, r2) => { + val (r1s, f1s) = simp(r1) + val (r2s, f2s) = simp(r2) + (r1s, r2s) match { + case (ZERO, _) => (ZERO, F_ERROR) + case (_, ZERO) => (ZERO, F_ERROR) + case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) + case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) + case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) + } + } + case RECD(x, r1) => { + val (r1s, f1s) = simp(r1) + (RECD(x, r1s), F_RECD(f1s)) + } + case r => (r, F_ID) +} + +// lexing functions including simplification +def lex_simp(r: Rexp, s: List[Char]) : Val = s match { + case Nil => if (nullable(r)) mkeps(r) else + { throw new Exception("lexing error") } + case c::cs => { + val (r_simp, f_simp) = simp(der(c, r)) + inj(r, c, f_simp(lex_simp(r_simp, cs))) + } +} + +def lexing_simp(r: Rexp, s: String) = + env(lex_simp(r, s.toList)) + + +// The Lexing Rules for the Fun Language + +def PLUS(r: Rexp) = r ~ r.% + +def Range(s : List[Char]) : Rexp = s match { + case Nil => ZERO + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SYM ~ (SYM | DIGIT).% +val NUM = PLUS(DIGIT) +val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" +val SEMI: Rexp = ";" +val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" +val WHITESPACE = PLUS(" " | "\n" | "\t") +val RPAREN: Rexp = "{" +val LPAREN: Rexp = "}" +val STRING: Rexp = "\"" ~ SYM.% ~ "\"" + + +val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("str" $ STRING) | + ("p" $ (LPAREN | RPAREN)) | + ("w" $ WHITESPACE)).% + + +// Two Simple While Tests +//======================== + +println("test: read n") + +val prog0 = """read n""" +println(lexing_simp(WHILE_REGS, prog0)) + +println("test: read n; write n ") + +val prog1 = """read n; write n""" +println(lexing_simp(WHILE_REGS, prog1)) + + +// Bigger Tests +//============== + +// escapes strings and prints them out as "", "\n" and so on +def esc(raw: String): String = { + import scala.reflect.runtime.universe._ + Literal(Constant(raw)).toString +} + +def escape(tks: List[(String, String)]) = + tks.map{ case (s1, s2) => (s1, esc(s2))} + +val prog2 = """ +write "Fib"; +read n; +minus1 := 0; +minus2 := 1; +while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 +}; +write "Result"; +write minus2 +""" + +println("Tokens for Fib") +println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n")) + + + +val prog3 = """ +start := 1000; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} +""" + +println("Tokens for Loops") +println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n")) + + + + + + + +// The tokens for the WHILE language +import java.io._ + +abstract class Token extends Serializable +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KWD(s: String) extends Token +case class T_STR(s: String) extends Token + +val token : PartialFunction[(String, String), Token] = { + case ("s", _) => T_SEMI + case ("p", "{") => T_LPAREN + case ("p", "}") => T_RPAREN + case ("i", s) => T_ID(s) + case ("o", s) => T_OP(s) + case ("n", s) => T_NUM(s.toInt) + case ("k", s) => T_KWD(s) + case ("str", s) => T_STR(s) +} + +def tokenise(s: String) : List[Token] = + lexing_simp(WHILE_REGS, s).collect(token) + +tokenise(prog2) +tokenise(prog3) + + +object Lexer extends App { + + val fname = "/tmp/nflx" + + def serialise(tks: List[Token]) = { + val out = new ObjectOutputStream(new FileOutputStream(fname)) + out.writeObject(tks) + out.close + } + + def deserialise() : List[Token] = { + val in = new ObjectInputStream(new FileInputStream(fname)) + val tks = in.readObject.asInstanceOf[List[Token]] + in.close + tks + } + + serialise(tokenise(prog2)) + println(deserialise()) + +} \ No newline at end of file diff -r d94fdbef1a4f -r 1c9a23304b85 handouts/amm-ho.pdf Binary file handouts/amm-ho.pdf has changed diff -r d94fdbef1a4f -r 1c9a23304b85 handouts/amm-ho.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/handouts/amm-ho.tex Mon Sep 07 12:18:07 2020 +0100 @@ -0,0 +1,126 @@ +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} +\usepackage{marvosym} + +%cheat sheet +%http://worldline.github.io/scala-cheatsheet/ + +\begin{document} + +\section*{Scala in 6CCS3CFL} + +For the coursework in this module you are free to use any programming +language you like, but I will show you all my code using Scala---I hope +you have fond memories of Scala from PEP. I will use the current +stable version of Scala, which is 2.13.3. If you need a reminder of +the Scala handouts for PEP have a look +\hr{http://talisker.nms.kcl.ac.uk/cgi-bin/repos.cgi/pep-material/raw-file/tip/handouts/pep-ho.pdf}\bigskip + +\noindent +The main difference to the Scala I showed you in PEP is that in CFL +I will use the Ammonite REPL + +\begin{quote} +\url{https://ammonite.io/#Ammonite-REPL} +\end{quote} + +\noindent +This is a drop-in replacement for the original Scala REPL and +works very similarly, for example + +\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] +$ amm +Loading... +Welcome to the Ammonite Repl 2.2.0 (Scala 2.13.3 Java 9) +scala> 1 + 2 +res0: Int = 3 +\end{lstlisting} %% $ + +\noindent +Ammonite uses the same Scala compiler, just adds some useful features +on top of it. It is quite main-stream in the Scala community and it should +therefore be very easy for you to install \texttt{amm}. The big +advantage of Ammonite is that it comes with some additional libraries +already built-in and also allows one to easily break up code into +smaller modules. For example reading and writing files in Ammonite can +be achieved with + +\begin{lstlisting}[numbers=none,language=Scala] +scala> import ammonite.ops._ + +scala> read(pwd / "file.name") +res1: String = """...""" + +scala> write.over(pwd / "file.name", "foo bar") +\end{lstlisting} + +\noindent +For loading and accessing code from another Scala file, you can import it +as follows: + +\begin{lstlisting}[numbers=none,language=Scala] +import $file.name-of-the-file +import name-of-the-file._ +\end{lstlisting} %% $ + +\noindent +This assumes the other Scala file is called +\texttt{name-of-the-file.sc} and requires the file to be in the same +directory where \texttt{amm} is working in. This will be very convenient +for the compiler we implement in CFL, because it allows us to easily +break-up the code into the lexer, parser and code generator. + +Another feature which exists in Ammonite, but not yet in the +current version of Scala (it will be in the next version called dotty) +is that you can mark functions as \texttt{@main}. For example + +\begin{lstlisting}[numbers=none,language=Scala] +@main +def foo() = ... +\end{lstlisting} + +\noindent +This means you can now call that function from the command line like + +\begin{lstlisting}[numbers=none,language=Scala] +$ amm file.sc foo +\end{lstlisting} %% $ + +\noindent +If you want to specify an argument on the commandline, say an int and +a string, then you can write + +\begin{lstlisting}[numbers=none,language=Scala] +@main +def bar(i: Int, s: String) = ... +\end{lstlisting} + +\noindent +and then call + +\begin{lstlisting}[numbers=none,language=Scala] +$ amm file.sc 42 foobar +\end{lstlisting} %% $ + +\noindent +What is also good in Ammonite that you can specify more than one +function to be ``main'' and then specify on the command line which +function do you want to run as entry-point.\bigskip + +\noindent +To sum up, Ammonite is a really useful addition to the Scala ecosystem. +You can find more information about how to use it in the first five chapters +of the ``Hands-on Scala'' book by Li Haoyi. These chapters are +free and can be used as a reference, see: + +\begin{center} +\url{https://www.handsonscala.com/part-i-introduction-to-scala.html} +\end{center} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r d94fdbef1a4f -r 1c9a23304b85 progs/cw1.scala --- a/progs/cw1.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,264 +0,0 @@ -// CW 1 -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -abstract class Rexp -case object ZERO extends Rexp // matches nothing -case object ONE extends Rexp // matches the empty string -case class CHAR(c: Char) extends Rexp // matches a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star -case class RANGE(cs: Set[Char]) extends Rexp // set of characters -case class PLUS(r: Rexp) extends Rexp // plus -case class OPT(r: Rexp) extends Rexp // optional -case class NTIMES(r: Rexp, n: Int) extends Rexp // n-times -case class UPNTIMES(r: Rexp, n: Int) extends Rexp // up n-times -case class FROMNTIMES(r: Rexp, n: Int) extends Rexp // from n-times -case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times -case class NOT(r: Rexp) extends Rexp // not -case class CFUN(f: Char => Boolean) extends Rexp // subsuming CHAR and RANGE - -def CHAR(c: Char) = CFUN(d => c == d) -val ALL = CFUN(_ => true) -def RANGE(cs: Set[Char]) = CFUN(d => cs.contains(d)) - -def CHAR(c: Char) = CFUN(c == _) -val ALL = CFUN(_ => true) -def RANGE(cs: Set[Char]) = CFUN(cs.contains(_)) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case RANGE(_) => false - case PLUS(r) => nullable(r) - case OPT(_) => true - case NTIMES(r, n) => if (n == 0) true else nullable(r) - case UPNTIMES(_, _) => true - case FROMNTIMES(r, n) => if (n == 0) true else nullable(r) - case NMTIMES(r, n, m) => if (n == 0) true else nullable(r) - case NOT(r) => !nullable(r) - case CFUN(_) => false -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case RANGE(cs) => if (cs contains c) ONE else ZERO - case PLUS(r) => SEQ(der(c, r), STAR(r)) - case OPT(r) => der(c, r) - case NTIMES(r, n) => - if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1)) - case UPNTIMES(r, n) => - if (n == 0) ZERO else SEQ(der(c, r), UPNTIMES(r, n - 1)) - case FROMNTIMES(r, n) => - if (n == 0) SEQ(der(c, r), STAR(r)) else SEQ(der(c, r), FROMNTIMES(r, n - 1)) - case NMTIMES(r, n, m) => - if (m < n) ZERO else - if (n == 0 && m == 0) ZERO else - if (n == 0) SEQ(der(c, r), UPNTIMES(r, m - 1)) - else SEQ(der(c, r), NMTIMES(r, n - 1, m - 1)) - case NOT(r) => NOT(der (c, r)) - case CFUN(f) => if (f(c)) ONE else ZERO -} - -def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - case r => r -} - - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -def ders_simp (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders_simp(s, simp(der(c, r))) -} - -// main matcher function including simplification -def matcher(r: Rexp, s: String) : Boolean = - nullable(ders_simp(s.toList, r)) - - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -val r1 = NTIMES("a", 3) -val r2 = NTIMES(OPT("a"), 3) -val r3 = UPNTIMES("a", 3) -val r4 = UPNTIMES(OPT("a"), 3) -val r5 = NMTIMES("a", 3, 5) -val r6 = NMTIMES(OPT("a"), 3, 5) - -val rs = List(r1, r2, r3, r4, r5, r6) - -rs.map(matcher(_, "")) -rs.map(matcher(_, "a")) -rs.map(matcher(_, "aa")) -rs.map(matcher(_, "aaa")) -rs.map(matcher(_, "aaaa")) -rs.map(matcher(_, "aaaaa")) -rs.map(matcher(_, "aaaaaa")) - -println("EMAIL:") -val LOWERCASE = ('a' to 'z').toSet -val DIGITS = ('0' to '9').toSet -val EMAIL = { PLUS(CFUN(LOWERCASE | DIGITS | ("_.-").toSet)) ~ "@" ~ - PLUS(CFUN(LOWERCASE | DIGITS | (".-").toSet)) ~ "." ~ - NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) } - -val my_email = "christian.urban@kcl.ac.uk" - -println(EMAIL); -println(matcher(EMAIL, my_email)) -println(ders_simp(my_email.toList, EMAIL)) - -println("COMMENTS:") -val ALL = CFUN((c:Char) => true) -val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/" - -println(matcher(COMMENT, "/**/")) -println(matcher(COMMENT, "/*foobar*/")) -println(matcher(COMMENT, "/*test*/test*/")) -println(matcher(COMMENT, "/*test/*test*/")) - -println("EVILS:") -val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a")) -val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a? - - -//40 * aaa matches -//43 * aaa + aa does not -//45 * aaa + a - -println("EVIL1:") -println(matcher(EVIL1, "aaa" * 40)) -println(matcher(EVIL1, "aaa" * 43 + "aa")) -println(matcher(EVIL1, "aaa" * 45 + "a")) -println("EVIL2:") -println(matcher(EVIL2, "aaa" * 40)) -println(matcher(EVIL2, "aaa" * 43 + "aa")) -println(matcher(EVIL2, "aaa" * 45 + "a")) - -println("TEST for bug pointed out by Filips Ivanovs") -val test = NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) - -println(matcher(test,"a")) -println(matcher(test,"ab")) -println(matcher(test,"abcdef")) -println(matcher(test,"abc")) -println(matcher(test,"....")) -println(matcher(test,"asdfg")) -println(matcher(test,"abcdefg")) - -println(test) -println(ders_simp("a".toList, test)) -println(ders_simp("aa".toList, test)) -println(ders_simp("aaa".toList, test)) -println(ders_simp("aaaa".toList, test)) -println(ders_simp("aaaaa".toList, test)) -println(ders_simp("aaaaaa".toList, test)) -println(ders_simp("aaaaaaa".toList, test)) - -def TEST(s: String, r: Rexp) = { - println("Rexp |" + s + "|") - println("Derivative:\n" + ders_simp(s.toList, r)) - println("Is Nullable: " + nullable(ders_simp(s.toList, r))) - Console.readLine -} - - -val ALL2 = "a" | "f" -val COMMENT2 = ("/*" ~ NOT(ALL2.% ~ "*/" ~ ALL2.%) ~ "*/") - -println("1) TEST TEST") -TEST("", COMMENT2) -TEST("/", COMMENT2) -TEST("/*", COMMENT2) -TEST("/*f", COMMENT2) -TEST("/*f*", COMMENT2) -TEST("/*f*/", COMMENT2) -TEST("/*f*/ ", COMMENT2) - -val ALL3 = "a" | "f" -val COMMENT3 = ("/" ~ NOT(ALL3.% ~ "&" ~ ALL3.%) ~ "&") - -println("2) TEST TEST") -TEST("", COMMENT3) -TEST("/", COMMENT3) -TEST("/", COMMENT3) -TEST("/f", COMMENT3) -TEST("/f&", COMMENT3) -TEST("/f& ", COMMENT3) - -//test: ("a" | "aa") ~ ("a" | "aa")* -val auxEVIL3 = ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) -val EVIL3 = SEQ(auxEVIL3, STAR(auxEVIL3)) -val EVIL3p = FROMNTIMES(auxEVIL3, 1) - - -for (i <- 1 to 100 by 1) { - println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + " size: " + - size(ders(("a" * i).toList, EVIL3))) -} - - - -val t1 = EVIL3 -val t2 = simp(der('a', t1)) -val t3 = simp(der('a', t2)) -val t4 = simp(der('a', t3)) - -val t1 = EVIL3p -val t2 = simp(der('a', t1)) -val t3 = simp(der('a', t2)) -val t4 = simp(der('a', t3)) diff -r d94fdbef1a4f -r 1c9a23304b85 progs/matcher.scala --- a/progs/matcher.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -package object matcher { - -// regular expressions -// including constructors for NOT and ALLC -sealed abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLC extends Rexp // recognises any character -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp // negation of a regular expression - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLC => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// main class for the tokenizer -case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { - -def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - -def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) -} - -def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } -} - -def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - -def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - -} - - -// regular expression for specifying -// ranges of characters -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// many alternatives -def Alts(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => ALT(r, Alts(rs)) -} -def ALTS(rs: Rexp*) = Alts(rs.toList) - -// repetitions -def Seqs(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => SEQ(r, Seqs(rs)) -} -def SEQS(rs: Rexp*) = Seqs(rs.toList) - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -} diff -r d94fdbef1a4f -r 1c9a23304b85 progs/parser2.scala --- a/progs/parser2.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -// A naive version of parser combinators producing parse trees -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parse trees -abstract class ParseTree -case class Leaf(t: Token) extends ParseTree -case class Branch(pts: List[ParseTree]) extends ParseTree - -def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match { - case Leaf(t) => Branch(List(Leaf(t), pt2)) - case Branch(pts) => Branch(pts ++ List(pt2)) -} - -// parser combinators -abstract class Parser { - def parse(ts: List[Token]): Set[(ParseTree, List[Token])] - - def parse_all(ts: List[Token]) : Set[ParseTree] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser) : Parser = new AltParser(this, right) - def ~ (right : => Parser) : Parser = new SeqParser(this, right) -} - -class AltParser(p: => Parser, q: => Parser) extends Parser { - def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) -} - -class SeqParser(p: => Parser, q: => Parser) extends Parser { - def parse(ts: List[Token]) = - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) -} - -class ListParser(ps: => List[Parser]) extends Parser { - def parse(ts: List[Token]) = ps match { - case Nil => Set() - case p::Nil => p.parse(ts) - case p::ps => - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) - } -} - -case class TokParser(tok: Token) extends Parser { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((Leaf(t), ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object IdParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) - case _ => Set () - } -} - -case object NumParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) - case _ => Set () - } -} - -lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol -lazy val T: Parser = (F ~ T_OP("*") ~ T) || F -lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser - -println(Tok.fromString("1 + 2 + 3")) -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) - -def eval(t: ParseTree) : Int = t match { - case Leaf(T_NUM(n)) => n.toInt - case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) - case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) - case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) -} - -(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) -(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) - -lazy val EXPR: Parser = - new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || - new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || - IdParser - -println(EXPR.parse_all(Tok.fromString("if a then b else c"))) -println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) - - - - diff -r d94fdbef1a4f -r 1c9a23304b85 progs/parser2a.scala --- a/progs/parser2a.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -// Parser combinators including semantic actions -// parses lists of tokens -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// parser combinators with return type T -abstract class Parser[T] { - def parse(ts: List[Token]): Set[(T, List[Token])] - - def parse_all(ts: List[Token]) : Set[T] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) - def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) - -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: List[Token]) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse (sb: List[Token]) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - - -case class TokParser(tok: Token) extends Parser[Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T -lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F -lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser - -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) -println(E.parse_all(Tok.fromString("1 + 2 * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) - -// Excercise: implement minus -println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r d94fdbef1a4f -r 1c9a23304b85 progs/parser3.scala --- a/progs/parser3.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -package object parser { - -// parser combinators -// with input type I and return type T -// -// needs to be compiled with scalac parser3.scala - -abstract class Parser[I <% Seq[_], T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head - - def parse_single(ts: I) : T = parse_all(ts).toList match { - case t::Nil => t - case _ => { println ("Parse Error") ; sys.exit(-1) } - } - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - -} diff -r d94fdbef1a4f -r 1c9a23304b85 progs/parser5.scala --- a/progs/parser5.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token - -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parser combinators with input type I and return type T -// and memoisation - -case class SubList[T](s: List[T], l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def sublist(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubList(s, low, high) -} - -type Ctxt[T] = List[(String, SubList[T])] - -abstract class Parser[I, T] { - - def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] - - def parse_all(s: List[I]) : Set[T] = - for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case object NumParser extends Parser[Token, Int] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { - case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) - case _ => Set() - } - else Set() - } -} - -case class TokParser(t: Token) extends Parser[Token, Token] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) - else Set() - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -lazy val E: Parser[Token, Int] = - new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || - (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || - NumParser) - -println(E.parse_all(Tk.fromString("1 + 2 * 3"))) -println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r d94fdbef1a4f -r 1c9a23304b85 progs/token.scala --- a/progs/token.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,342 +0,0 @@ -// A simple lexer inspired by work of Sulzmann & Lu -//================================================== - - -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -// regular expressions including records -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - -// values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]): Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = - charlist2rexp(s.toList) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} - -def nullable(r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case RECD(_, r1) => nullable(r1) -} - -def der(c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case RECD(_, r1) => der(c, r1) -} - - -// extracts a string from value -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString - case Rec(_, v) => flatten(v) -} - - -// extracts an environment from a value; -// used for tokenise a string -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Sequ(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) -} - -// The Injection Part of the lexer - -def mkeps(r: Rexp) : Val = r match { - case ONE => Empty - case ALT(r1, r2) => - if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) - case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) - case STAR(r) => Stars(Nil) - case RECD(x, r) => Rec(x, mkeps(r)) -} - -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { - case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) - case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) - case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) - case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) -} - -// some "rectification" functions for simplification -def F_ID(v: Val): Val = v -def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) -def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) -def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Right(v) => Right(f2(v)) - case Left(v) => Left(f1(v)) -} -def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) -} -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} -def F_ERROR(v: Val): Val = throw new Exception("error") - -def simp(r: Rexp): (Rexp, Val => Val) = r match { - case ALT(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (r2s, F_RIGHT(f2s)) - case (_, ZERO) => (r1s, F_LEFT(f1s)) - case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) - } - } - case SEQ(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (ZERO, F_ERROR) - case (_, ZERO) => (ZERO, F_ERROR) - case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) - case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) - case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) - } - } - case RECD(x, r1) => { - val (r1s, f1s) = simp(r1) - (RECD(x, r1s), F_RECD(f1s)) - } - case r => (r, F_ID) -} - -// lexing functions including simplification -def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else - { throw new Exception("lexing error") } - case c::cs => { - val (r_simp, f_simp) = simp(der(c, r)) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } -} - -def lexing_simp(r: Rexp, s: String) = - env(lex_simp(r, s.toList)) - - -// The Lexing Rules for the Fun Language - -def PLUS(r: Rexp) = r ~ r.% - -def Range(s : List[Char]) : Rexp = s match { - case Nil => ZERO - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" -val WHITESPACE = PLUS(" " | "\n" | "\t") -val RPAREN: Rexp = "{" -val LPAREN: Rexp = "}" -val STRING: Rexp = "\"" ~ SYM.% ~ "\"" - - -val WHILE_REGS = (("k" $ KEYWORD) | - ("i" $ ID) | - ("o" $ OP) | - ("n" $ NUM) | - ("s" $ SEMI) | - ("str" $ STRING) | - ("p" $ (LPAREN | RPAREN)) | - ("w" $ WHITESPACE)).% - - -// Two Simple While Tests -//======================== - -println("test: read n") - -val prog0 = """read n""" -println(lexing_simp(WHILE_REGS, prog0)) - -println("test: read n; write n ") - -val prog1 = """read n; write n""" -println(lexing_simp(WHILE_REGS, prog1)) - - -// Bigger Tests -//============== - -// escapes strings and prints them out as "", "\n" and so on -def esc(raw: String): String = { - import scala.reflect.runtime.universe._ - Literal(Constant(raw)).toString -} - -def escape(tks: List[(String, String)]) = - tks.map{ case (s1, s2) => (s1, esc(s2))} - -val prog2 = """ -write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write "Result"; -write minus2 -""" - -println("Tokens for Fib") -println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n")) - - - -val prog3 = """ -start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - -println("Tokens for Loops") -println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n")) - - - - - - - -// The tokens for the WHILE language -import java.io._ - -abstract class Token extends Serializable -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KWD(s: String) extends Token -case class T_STR(s: String) extends Token - -val token : PartialFunction[(String, String), Token] = { - case ("s", _) => T_SEMI - case ("p", "{") => T_LPAREN - case ("p", "}") => T_RPAREN - case ("i", s) => T_ID(s) - case ("o", s) => T_OP(s) - case ("n", s) => T_NUM(s.toInt) - case ("k", s) => T_KWD(s) - case ("str", s) => T_STR(s) -} - -def tokenise(s: String) : List[Token] = - lexing_simp(WHILE_REGS, s).collect(token) - -tokenise(prog2) -tokenise(prog3) - - -object Lexer extends App { - - val fname = "/tmp/nflx" - - def serialise(tks: List[Token]) = { - val out = new ObjectOutputStream(new FileOutputStream(fname)) - out.writeObject(tks) - out.close - } - - def deserialise() : List[Token] = { - val in = new ObjectInputStream(new FileInputStream(fname)) - val tks = in.readObject.asInstanceOf[List[Token]] - in.close - tks - } - - serialise(tokenise(prog2)) - println(deserialise()) - -} \ No newline at end of file diff -r d94fdbef1a4f -r 1c9a23304b85 progs/token2.scala --- a/progs/token2.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,463 +0,0 @@ -import scala.language.implicitConversions -import scala.language.reflectiveCalls -import scala.annotation.tailrec - -abstract class Rexp -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp -case class CRANGE(cs: String) extends Rexp -case class PLUS(r: Rexp) extends Rexp -case class OPT(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Seq(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]): Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case RECD(_, r) => nullable(r) - case CRANGE(_) => false - case PLUS(r) => nullable(r) - case OPT(_) => true - case NTIMES(r, n) => if (n == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case RECD(_, r1) => der(c, r1) - case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL - case PLUS(r) => SEQ(der(c, r), STAR(r)) - case OPT(r) => ALT(der(c, r), NULL) - case NTIMES(r, n) => if (n == 0) NULL else der(c, SEQ(r, NTIMES(r, n - 1))) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// extracts a string from value -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Seq(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString - case Rec(_, v) => flatten(v) -} - -// extracts an environment from a value -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Seq(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) -} - -// injection part -def mkeps(r: Rexp) : Val = r match { - case EMPTY => Empty - case ALT(r1, r2) => - if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) - case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2)) - case STAR(r) => Stars(Nil) - case RECD(x, r) => Rec(x, mkeps(r)) - case PLUS(r) => Stars(List(mkeps(r))) - case OPT(_) => Right(Empty) - case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r))) - case _ => { println ("r : " + r.toString); - throw new Exception("mkeps error")} -} - - -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { - case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (SEQ(r1, r2), Seq(v1, v2)) => Seq(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Left(Seq(v1, v2))) => Seq(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Right(v2)) => Seq(mkeps(r1), inj(r2, c, v2)) - case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) - case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(_), Empty) => Chr(c) - case (CRANGE(_), Empty) => Chr(c) - case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) - case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (OPT(r), Left(v1)) => Left(inj(r, c, v1)) - case (NTIMES(r, n), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (NTIMES(r, n), Left(Seq(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs) - case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs) - case _ => { println ("r : " + r.toString + " v: " + v.toString); - throw new Exception("inj error")} -} - -// main lexing function (produces a value) -def lex(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") - case c::cs => inj(r, c, lex(der(c, r), cs)) -} - -def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) - -lexing(("ab" | "ab") ~ ("b" | EMPTY), "ab") - -lexing(OPT("ab"), "ab") - -lexing(NTIMES("1", 3), "111") -lexing(NTIMES("1" | EMPTY, 3), "11") - -// some "rectification" functions for simplification -def F_ID(v: Val): Val = v -def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) -def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) -def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Right(v) => Right(f2(v)) - case Left(v) => Left(f1(v)) -} -def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Seq(v1, v2) => Seq(f1(v1), f2(v2)) -} -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = - (v:Val) => Seq(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = - (v:Val) => Seq(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} -def F_ERROR(v: Val): Val = throw new Exception("error") - -// simplification of regular expressions returning also an -// rectification function; no simplification under STAR -def simp(r: Rexp): (Rexp, Val => Val) = r match { - case ALT(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (NULL, _) => (r2s, F_RIGHT(f2s)) - case (_, NULL) => (r1s, F_LEFT(f1s)) - case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) - } - } - case SEQ(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (NULL, _) => (NULL, F_ERROR) - case (_, NULL) => (NULL, F_ERROR) - case (EMPTY, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) - case (_, EMPTY) => (r1s, F_SEQ_Empty2(f1s, f2s)) - case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) - } - } - case RECD(x, r1) => { - val (r1s, f1s) = simp(r1) - (RECD(x, r1s), F_RECD(f1s)) - } - case r => (r, F_ID) -} - -def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") - case c::cs => { - val (r_simp, f_simp) = simp(der(c, r)) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } -} - -def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) - -lexing_simp(("a" | "ab") ~ ("b" | ""), "ab") - -lexing_simp(OPT("ab"), "ab") - -// Lexing Rules for a Small While Language - -val SYM = CRANGE("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") -val DIGIT = CRANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" -val WHITESPACE = PLUS(" " | "\n" | "\t") -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val STRING: Rexp = "\"" ~ SYM.% ~ "\"" - - -val WHILE_REGS = (("k" $ KEYWORD) | - ("i" $ ID) | - ("o" $ OP) | - ("n" $ NUM) | - ("s" $ SEMI) | - ("str" $ STRING) | - ("p" $ (LPAREN | RPAREN)) | - ("b" $ (BEGIN | END)) | - ("w" $ WHITESPACE)).% - -// filters out all white spaces -def tokenise(r: Rexp, s: String) = - env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"} - -// Testing -//============ - -def time[T](code: => T) = { - val start = System.nanoTime() - val result = code - val end = System.nanoTime() - println((end - start)/1.0e9) - result -} - -println(lexing_simp(OPT("1"), "1")) -println(lexing_simp(OPT("1"), "")) -println(ders("111".toList, NTIMES("1",3))) -println(lexing_simp(NTIMES("1",3), "111")) - -val r1 = ("a" | "ab") ~ ("bcd" | "c") -println(lexing(r1, "abcd")) - -val r2 = ("" | "a") ~ ("ab" | "b") -println(lexing(r2, "ab")) - - -// Two Simple While Tests -//======================== -println("prog0 test") - -val prog0 = """read n""" -println(env(lexing_simp(WHILE_REGS, prog0))) - -println("prog1 test") - -val prog1 = """read n; write (n)""" -println(env(lexing_simp(WHILE_REGS, prog1))) - - -// Big Test -//========== - -def escape(raw: String): String = { - import scala.reflect.runtime.universe._ - Literal(Constant(raw)).toString -} - -val prog2 = """ -write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write "Result"; -write minus2 -""" - -val prog3 = """ -start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - - -println("Tokens") -println(tokenise(WHILE_REGS, prog2).mkString("\n")) - -val fib_tokens = tokenise(WHILE_REGS, prog2) -fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n") - - -val test_tokens = tokenise(WHILE_REGS, prog3) -test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n") - - -/* -for (i <- 1 to 120 by 10) { - print(i.toString + ": ") - time(lexing_simp(WHILE_REGS, prog2 * i)) -} -*/ - -val toks_fib = - List(("k","write"), - ("str","Fib"), - ("s",";"), - ("k","read"), - ("i","n"), - ("s",";"), - ("i","minus1"), - ("o",":="), - ("n","0"), - ("s",";"), - ("i","minus2"), - ("o",":="), - ("n","1"), - ("s",";"), - ("k","while"), - ("i","n"), - ("o",">"), - ("n","0"), - ("k","do"), - ("b","{"), - ("i","temp"), - ("o",":="), - ("i","minus2"), - ("s",";"), - ("i","minus2"), - ("o",":="), - ("i","minus1"), - ("o","+"), - ("i","minus2"), - ("s",";"), - ("i","minus1"), - ("o",":="), - ("i","temp"), - ("s",";"), - ("i","n"), - ("o",":="), - ("i","n"), - ("o","-"), - ("n","1"), - ("b","}"), - ("s",";"), - ("k","write"), - ("str","Result"), - ("s",";"), - ("k","write"), - ("i","minus2")) - -val toks_test = - List(("i","start"), - ("o",":="), - ("n","1000"), - ("s",";"), - ("i","x"), - ("o",":="), - ("i","start"), - ("s",";"), - ("i","y"), - ("o",":="), - ("i","start"), - ("s",";"), - ("i","z"), - ("o",":="), - ("i","start"), - ("s",";"), - ("k","while"), - ("n","0"), - ("o","<"), - ("i","x"), - ("k","do"), - ("b","{"), - ("k","while"), - ("n","0"), - ("o","<"), - ("i","y"), - ("k","do"), - ("b","{"), - ("k","while"), - ("n","0"), - ("o","<"), - ("i","z"), - ("k","do"), - ("b","{"), - ("i","z"), - ("o",":="), - ("i","z"), - ("o","-"), - ("n","1"), - ("b","}"), - ("s",";"), - ("i","z"), - ("o",":="), - ("i","start"), - ("s",";"), - ("i","y"), - ("o",":="), - ("i","y"), - ("o","-"), - ("n","1"), - ("b","}"), - ("s",";"), - ("i","y"), - ("o",":="), - ("i","start"), - ("s",";"), - ("i","x"), - ("o",":="), - ("i","x"), - ("o","-"), - ("n","1"), - ("b","}")) diff -r d94fdbef1a4f -r 1c9a23304b85 progs/tokenise.scala --- a/progs/tokenise.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,276 +0,0 @@ -// A simple tokeniser based on the Sulzmann & Lu algorithm -//========================================================= -// -// call with -// -// scala tokenise.scala fib.while -// -// scala tokenise.scala loops.while -// -// this will generate a .tks file that can be deserialised back -// into a list of tokens -// you can add -Xno-patmat-analysis in order to get rid of the -// match-not-exhaustive warning - -object Tokenise { - -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -// regular expressions including records -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - -// values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]): Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = - charlist2rexp(s.toList) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} - -def nullable(r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case RECD(_, r1) => nullable(r1) -} - -def der(c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case RECD(_, r1) => der(c, r1) -} - - -// extracts a string from value -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString - case Rec(_, v) => flatten(v) -} - - -// extracts an environment from a value; -// used for tokenise a string -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Sequ(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) -} - -// The Injection Part of the lexer - -def mkeps(r: Rexp) : Val = r match { - case ONE => Empty - case ALT(r1, r2) => - if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) - case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) - case STAR(r) => Stars(Nil) - case RECD(x, r) => Rec(x, mkeps(r)) -} - -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { - case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) - case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) - case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) - case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) -} - -// some "rectification" functions for simplification -def F_ID(v: Val): Val = v -def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) -def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) -def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Right(v) => Right(f2(v)) - case Left(v) => Left(f1(v)) -} -def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) -} -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} -def F_ERROR(v: Val): Val = throw new Exception("error") - -def simp(r: Rexp): (Rexp, Val => Val) = r match { - case ALT(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (r2s, F_RIGHT(f2s)) - case (_, ZERO) => (r1s, F_LEFT(f1s)) - case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) - } - } - case SEQ(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (ZERO, F_ERROR) - case (_, ZERO) => (ZERO, F_ERROR) - case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) - case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) - case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) - } - } - case r => (r, F_ID) -} - -// lexing functions including simplification -def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else - { throw new Exception("lexing error") } - case c::cs => { - val (r_simp, f_simp) = simp(der(c, r)) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } -} - -def lexing_simp(r: Rexp, s: String) = - env(lex_simp(r, s.toList)) - - -// The Lexing Rules for the WHILE Language - -// inefficient representations for some extended regular -// expressions -def PLUS(r: Rexp) = r ~ r.% - -def Range(s : List[Char]) : Rexp = s match { - case Nil => ZERO - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" -val WHITESPACE = PLUS(" " | "\n" | "\t") -val RPAREN: Rexp = "{" -val LPAREN: Rexp = "}" -val STRING: Rexp = "\"" ~ SYM.% ~ "\"" - - -val WHILE_REGS = (("k" $ KEYWORD) | - ("i" $ ID) | - ("o" $ OP) | - ("n" $ NUM) | - ("s" $ SEMI) | - ("str" $ STRING) | - ("p" $ (LPAREN | RPAREN)) | - ("w" $ WHITESPACE)).% - - - -// Generate tokens for the WHILE language -// and serialise them into a .tks file - -import java.io._ - -abstract class Token extends Serializable -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KWD(s: String) extends Token -case class T_STR(s: String) extends Token - -// transforms pairs into tokens -val token : PartialFunction[(String, String), Token] = { - case ("s", _) => T_SEMI - case ("p", "{") => T_LPAREN - case ("p", "}") => T_RPAREN - case ("i", s) => T_ID(s) - case ("o", s) => T_OP(s) - case ("n", s) => T_NUM(s.toInt) - case ("k", s) => T_KWD(s) - case ("str", s) => T_STR(s) -} - -// filters out all un-interesting tokens -def tokenise(s: String) : List[Token] = - lexing_simp(WHILE_REGS, s).collect(token) - - -def serialise[T](fname: String, data: T) = { - import scala.util.Using - Using(new ObjectOutputStream(new FileOutputStream(fname))) { - out => out.writeObject(data) - } -} - -def main(args: Array[String]) : Unit = { - val fname = args(0) - val tname = fname.stripSuffix(".while") ++ ".tks" - val file = io.Source.fromFile(fname).mkString - serialise(tname, tokenise(file)) -} - - -} diff -r d94fdbef1a4f -r 1c9a23304b85 progs/toks.scala --- a/progs/toks.scala Wed Sep 02 23:34:19 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -abstract class Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KWD(s: String) extends Token -case class T_STR(s: String) extends Token - - -// Loops program -//=============== - -/* -start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { z := z - 1 }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -*/ - -val loops = - List(T_ID("start"), T_OP(":="), T_NUM(1000), T_SEMI, T_ID("x"), T_OP(":="), - T_ID("start"), T_SEMI, T_ID("y"), T_OP(":="), T_ID("start"), T_SEMI, - T_ID("z"), T_OP(":="), T_ID("start"), T_SEMI, T_KWD("while"), T_NUM(0), - T_OP("<"), T_ID("x"), T_KWD("do"), T_LPAREN, T_KWD("while"), T_NUM(0), - T_OP("<"), T_ID("y"), T_KWD("do"), T_LPAREN, T_KWD("while"), T_NUM(0), - T_OP("<"), T_ID("z"), T_KWD("do"), T_LPAREN, T_ID("z"), T_OP(":="), - T_ID("z"), T_OP("-"), T_NUM(1), T_RPAREN, T_SEMI, T_ID("z"), T_OP(":="), - T_ID("start"), T_SEMI, T_ID("y"), T_OP(":="), T_ID("y"), T_OP("-"), - T_NUM(1), T_RPAREN, T_SEMI, T_ID("y"), T_OP(":="), T_ID("start"), - T_SEMI, T_ID("x"), T_OP(":="), T_ID("x"), T_OP("-"), T_NUM(1), T_RPAREN) - - -// Fib program -//============= - -/* -write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write "Result"; -write minus2 -*/ - -val fib = - List(T_KWD("write"), T_STR("Fib"), T_SEMI, T_KWD("read"), T_ID("n"), - T_SEMI, T_ID("minus1"), T_OP(":="), T_NUM(0), T_SEMI, T_ID("minus2"), - T_OP(":="), T_NUM(1), T_SEMI, T_KWD("while"), T_ID("n"), T_OP(">"), - T_NUM(0), T_KWD("do"), T_LPAREN, T_ID("temp"), T_OP(":="), - T_ID("minus2"), T_SEMI, T_ID("minus2"), T_OP(":="), T_ID("minus1"), - T_OP("+"), T_ID("minus2"), T_SEMI, T_ID("minus1"), T_OP(":="), - T_ID("temp"), T_SEMI, T_ID("n"), T_OP(":="), T_ID("n"), T_OP("-"), - T_NUM(1), T_RPAREN, T_SEMI, T_KWD("write"), T_STR("Result"), T_SEMI, - T_KWD("write"), T_ID("minus2")) - - - -// Factors program -//================= - -/* -write "Input n please"; -read n; -write "The factors of n are"; -f := 2; -while n != 1 do { - while (n / f) * f == n do { - write f; - n := n / f - }; - f := f + 1 -} -*/ - -val factors = - List(T_KWD("write"), T_STR("Input n please"), T_SEMI, T_KWD("read"), - T_ID("n"), T_SEMI, T_KWD("write"), T_STR("The factors of n are"), - T_SEMI, T_ID("f"), T_OP(":="), T_NUM(2), T_SEMI, T_KWD("while"), - T_ID("n"), T_OP("!="), T_NUM(1), T_KWD("do"), T_LPAREN, - T_KWD("while"), T_ID("n"), T_OP("/"), T_ID("f"), T_OP("*"), - T_ID("f"), T_OP("=="), T_ID("n"), T_KWD("do"), T_LPAREN, - T_KWD("write"), T_ID("f"), T_SEMI, T_ID("n"), T_OP(":="), - T_ID("n"), T_OP("/"), T_ID("f"), T_RPAREN, T_SEMI, T_ID("f"), - T_OP(":="), T_ID("f"), T_OP("+"), T_NUM(1), T_RPAREN) - - -// Primes program -//================ - -/* -end := 100; -n := 2; -while (n < end) do { - f := 2; - tmp := 0; - while ((f < n / 2 + 1) && (tmp == 0)) do { - if ((n / f) * f == n) then { tmp := 1 } else { skip }; - f := f + 1 - }; - if (tmp == 0) then { write(n) } else { skip }; - n := n + 1 -} -*/ - -val primes = - List(T_ID("end"), T_OP(":="), T_NUM(100), T_SEMI, T_ID("n"), T_OP(":="), - T_NUM(2), T_SEMI, T_KWD("while"), T_ID("n"), T_OP("<"), T_ID("end"), - T_KWD("do"), T_LPAREN, T_ID("f"), T_OP(":="), T_NUM(2), T_SEMI, - T_ID("tmp"), T_OP(":="), T_NUM(0), T_SEMI, T_KWD("while"), T_ID("f"), - T_OP("<"), T_ID("n"), T_OP("/"), T_NUM(2), T_OP("+"), T_NUM(1), - T_OP("&&"), T_ID("tmp"), T_OP("=="), T_NUM(0), T_KWD("do"), T_LPAREN, - T_KWD("if"), T_ID("n"), T_OP("/"), T_ID("f"), T_OP("*"), T_ID("f"), - T_OP("=="), T_ID("n"), T_KWD("then"), T_LPAREN, T_ID("tmp"), T_OP(":="), - T_NUM(1), T_RPAREN, T_KWD("else"), T_LPAREN, T_KWD("skip"), T_RPAREN, - T_SEMI, T_ID("f"), T_OP(":="), T_ID("f"), T_OP("+"), T_NUM(1), - T_RPAREN, T_SEMI, T_KWD("if"), T_ID("tmp"), T_OP("=="), T_NUM(0), - T_KWD("then"), T_LPAREN, T_KWD("write"), T_ID("n"), T_RPAREN, - T_KWD("else"), T_LPAREN, T_KWD("skip"), T_RPAREN, T_SEMI, T_ID("n"), - T_OP(":="), T_ID("n"), T_OP("+"), T_NUM(1), T_RPAREN) diff -r d94fdbef1a4f -r 1c9a23304b85 slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r d94fdbef1a4f -r 1c9a23304b85 slides/slides01.tex --- a/slides/slides01.tex Wed Sep 02 23:34:19 2020 +0100 +++ b/slides/slides01.tex Mon Sep 07 12:18:07 2020 +0100 @@ -5,6 +5,11 @@ \usepackage{../langs} \usepackage{../data} +\usepackage{tcolorbox} +\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black} +\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1} +\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1} + \hfuzz=220pt @@ -26,8 +31,24 @@ \begin{document} +\begin{frame}[t] +\begin{mybox} +A physical explanation the \emph{dynamic matrix}\\ +lots of text +\end{mybox} +\begin{mybox2}{Test} +A physical explanation the \emph{dynamic matrix}\\ +lots of text +\end{mybox2} + +\begin{mybox3}{Test} +A physical explanation the \emph{dynamic matrix}\\ +lots of text +\end{mybox3} +\end{frame} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] \frametitle{% diff -r d94fdbef1a4f -r 1c9a23304b85 solution/cw1/cw1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/solution/cw1/cw1.scala Mon Sep 07 12:18:07 2020 +0100 @@ -0,0 +1,264 @@ +// CW 1 +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +abstract class Rexp +case object ZERO extends Rexp // matches nothing +case object ONE extends Rexp // matches the empty string +case class CHAR(c: Char) extends Rexp // matches a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star +case class RANGE(cs: Set[Char]) extends Rexp // set of characters +case class PLUS(r: Rexp) extends Rexp // plus +case class OPT(r: Rexp) extends Rexp // optional +case class NTIMES(r: Rexp, n: Int) extends Rexp // n-times +case class UPNTIMES(r: Rexp, n: Int) extends Rexp // up n-times +case class FROMNTIMES(r: Rexp, n: Int) extends Rexp // from n-times +case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times +case class NOT(r: Rexp) extends Rexp // not +case class CFUN(f: Char => Boolean) extends Rexp // subsuming CHAR and RANGE + +def CHAR(c: Char) = CFUN(d => c == d) +val ALL = CFUN(_ => true) +def RANGE(cs: Set[Char]) = CFUN(d => cs.contains(d)) + +def CHAR(c: Char) = CFUN(c == _) +val ALL = CFUN(_ => true) +def RANGE(cs: Set[Char]) = CFUN(cs.contains(_)) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case RANGE(_) => false + case PLUS(r) => nullable(r) + case OPT(_) => true + case NTIMES(r, n) => if (n == 0) true else nullable(r) + case UPNTIMES(_, _) => true + case FROMNTIMES(r, n) => if (n == 0) true else nullable(r) + case NMTIMES(r, n, m) => if (n == 0) true else nullable(r) + case NOT(r) => !nullable(r) + case CFUN(_) => false +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case RANGE(cs) => if (cs contains c) ONE else ZERO + case PLUS(r) => SEQ(der(c, r), STAR(r)) + case OPT(r) => der(c, r) + case NTIMES(r, n) => + if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1)) + case UPNTIMES(r, n) => + if (n == 0) ZERO else SEQ(der(c, r), UPNTIMES(r, n - 1)) + case FROMNTIMES(r, n) => + if (n == 0) SEQ(der(c, r), STAR(r)) else SEQ(der(c, r), FROMNTIMES(r, n - 1)) + case NMTIMES(r, n, m) => + if (m < n) ZERO else + if (n == 0 && m == 0) ZERO else + if (n == 0) SEQ(der(c, r), UPNTIMES(r, m - 1)) + else SEQ(der(c, r), NMTIMES(r, n - 1, m - 1)) + case NOT(r) => NOT(der (c, r)) + case CFUN(f) => if (f(c)) ONE else ZERO +} + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +def ders_simp (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders_simp(s, simp(der(c, r))) +} + +// main matcher function including simplification +def matcher(r: Rexp, s: String) : Boolean = + nullable(ders_simp(s.toList, r)) + + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + +val r1 = NTIMES("a", 3) +val r2 = NTIMES(OPT("a"), 3) +val r3 = UPNTIMES("a", 3) +val r4 = UPNTIMES(OPT("a"), 3) +val r5 = NMTIMES("a", 3, 5) +val r6 = NMTIMES(OPT("a"), 3, 5) + +val rs = List(r1, r2, r3, r4, r5, r6) + +rs.map(matcher(_, "")) +rs.map(matcher(_, "a")) +rs.map(matcher(_, "aa")) +rs.map(matcher(_, "aaa")) +rs.map(matcher(_, "aaaa")) +rs.map(matcher(_, "aaaaa")) +rs.map(matcher(_, "aaaaaa")) + +println("EMAIL:") +val LOWERCASE = ('a' to 'z').toSet +val DIGITS = ('0' to '9').toSet +val EMAIL = { PLUS(CFUN(LOWERCASE | DIGITS | ("_.-").toSet)) ~ "@" ~ + PLUS(CFUN(LOWERCASE | DIGITS | (".-").toSet)) ~ "." ~ + NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) } + +val my_email = "christian.urban@kcl.ac.uk" + +println(EMAIL); +println(matcher(EMAIL, my_email)) +println(ders_simp(my_email.toList, EMAIL)) + +println("COMMENTS:") +val ALL = CFUN((c:Char) => true) +val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/" + +println(matcher(COMMENT, "/**/")) +println(matcher(COMMENT, "/*foobar*/")) +println(matcher(COMMENT, "/*test*/test*/")) +println(matcher(COMMENT, "/*test/*test*/")) + +println("EVILS:") +val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a")) +val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a? + + +//40 * aaa matches +//43 * aaa + aa does not +//45 * aaa + a + +println("EVIL1:") +println(matcher(EVIL1, "aaa" * 40)) +println(matcher(EVIL1, "aaa" * 43 + "aa")) +println(matcher(EVIL1, "aaa" * 45 + "a")) +println("EVIL2:") +println(matcher(EVIL2, "aaa" * 40)) +println(matcher(EVIL2, "aaa" * 43 + "aa")) +println(matcher(EVIL2, "aaa" * 45 + "a")) + +println("TEST for bug pointed out by Filips Ivanovs") +val test = NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) + +println(matcher(test,"a")) +println(matcher(test,"ab")) +println(matcher(test,"abcdef")) +println(matcher(test,"abc")) +println(matcher(test,"....")) +println(matcher(test,"asdfg")) +println(matcher(test,"abcdefg")) + +println(test) +println(ders_simp("a".toList, test)) +println(ders_simp("aa".toList, test)) +println(ders_simp("aaa".toList, test)) +println(ders_simp("aaaa".toList, test)) +println(ders_simp("aaaaa".toList, test)) +println(ders_simp("aaaaaa".toList, test)) +println(ders_simp("aaaaaaa".toList, test)) + +def TEST(s: String, r: Rexp) = { + println("Rexp |" + s + "|") + println("Derivative:\n" + ders_simp(s.toList, r)) + println("Is Nullable: " + nullable(ders_simp(s.toList, r))) + Console.readLine +} + + +val ALL2 = "a" | "f" +val COMMENT2 = ("/*" ~ NOT(ALL2.% ~ "*/" ~ ALL2.%) ~ "*/") + +println("1) TEST TEST") +TEST("", COMMENT2) +TEST("/", COMMENT2) +TEST("/*", COMMENT2) +TEST("/*f", COMMENT2) +TEST("/*f*", COMMENT2) +TEST("/*f*/", COMMENT2) +TEST("/*f*/ ", COMMENT2) + +val ALL3 = "a" | "f" +val COMMENT3 = ("/" ~ NOT(ALL3.% ~ "&" ~ ALL3.%) ~ "&") + +println("2) TEST TEST") +TEST("", COMMENT3) +TEST("/", COMMENT3) +TEST("/", COMMENT3) +TEST("/f", COMMENT3) +TEST("/f&", COMMENT3) +TEST("/f& ", COMMENT3) + +//test: ("a" | "aa") ~ ("a" | "aa")* +val auxEVIL3 = ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) +val EVIL3 = SEQ(auxEVIL3, STAR(auxEVIL3)) +val EVIL3p = FROMNTIMES(auxEVIL3, 1) + + +for (i <- 1 to 100 by 1) { + println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + " size: " + + size(ders(("a" * i).toList, EVIL3))) +} + + + +val t1 = EVIL3 +val t2 = simp(der('a', t1)) +val t3 = simp(der('a', t2)) +val t4 = simp(der('a', t3)) + +val t1 = EVIL3p +val t2 = simp(der('a', t1)) +val t3 = simp(der('a', t2)) +val t4 = simp(der('a', t3)) diff -r d94fdbef1a4f -r 1c9a23304b85 style.sty --- a/style.sty Wed Sep 02 23:34:19 2020 +0100 +++ b/style.sty Mon Sep 07 12:18:07 2020 +0100 @@ -52,6 +52,7 @@ %%% url pointers +\newcommand{\hr}[1]{\href{#1}{\faHandPointRight[regular]}} \newcommand{\here}[1]{\marginnote{\href{#1}{\faHandPointRight[regular]}}} \newcommand{\video}[1]{\marginnote{\href{#1}{\faFilm}}} \newcommand{\alert}{\reversemarginpar\marginpar{\mbox{}\hfill\textcolor{red}{\faExclamationTriangle}}}