--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/parser2a.scala Mon Sep 07 12:18:07 2020 +0100
@@ -0,0 +1,105 @@
+// Parser combinators including semantic actions
+// parses lists of tokens
+//
+// Needs
+// :load matcher.scala
+
+// some regular expressions
+val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
+val ID = PLUS(LETTER)
+
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_IF extends Token
+case object T_THEN extends Token
+case object T_ELSE extends Token
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List(("if", (s) => T_IF),
+ ("then", (s) => T_THEN),
+ ("else", (s) => T_ELSE),
+ (NUMBER, (s) => T_NUM(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+// parser combinators with return type T
+abstract class Parser[T] {
+ def parse(ts: List[Token]): Set[(T, List[Token])]
+
+ def parse_all(ts: List[Token]) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
+
+ def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2)
+ def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1)
+
+}
+
+class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
+ def parse(sb: List[Token]) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+}
+
+class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
+ def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
+ def parse (sb: List[Token]) =
+ for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+
+
+case class TokParser(tok: Token) extends Parser[Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
+lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
+lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
+
+println(E.parse_all(Tok.fromString("1 + 2 + 3")))
+println(E.parse_all(Tok.fromString("1 + 2 * 3")))
+println(E.parse_all(Tok.fromString("(1 + 2) * 3")))
+
+// Excercise: implement minus
+println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
+println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/parser5.scala Mon Sep 07 12:18:07 2020 +0100
@@ -0,0 +1,113 @@
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+
+// parser combinators with input type I and return type T
+// and memoisation
+
+case class SubList[T](s: List[T], l: Int, h: Int) {
+ def low = l
+ def high = h
+ def length = h - l
+ def sublist(l: Int = l, h: Int = h) = s.slice(l, h)
+ def set(low: Int = l, high: Int = h) = SubList(s, low, high)
+}
+
+type Ctxt[T] = List[(String, SubList[T])]
+
+abstract class Parser[I, T] {
+
+ def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])]
+
+ def parse_all(s: List[I]) : Set[T] =
+ for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head
+
+ def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
+ def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
+}
+
+class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) =
+ for ((head1, tail1) <- p.parse(sb, ctxt);
+ (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
+}
+
+class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
+}
+
+class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) =
+ for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
+}
+
+case object NumParser extends Parser[Token, Int] {
+ def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
+ if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match {
+ case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1)))
+ case _ => Set()
+ }
+ else Set()
+ }
+}
+
+case class TokParser(t: Token) extends Parser[Token, Token] {
+ def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
+ if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1)))
+ else Set()
+ }
+}
+
+implicit def token2tparser(t: Token) = TokParser(t)
+
+class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
+ if (sb.length == 0) Set()
+ else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
+ yield (head, tail.set(high = tail.high + 1))
+ }
+}
+
+class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
+ val should_trim = ctxt.contains (nt, sb)
+ if (should_trim && sb.length == 0) Set()
+ else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
+ else p.parse(sb, (nt, sb)::ctxt)
+ }
+}
+
+lazy val E: Parser[Token, Int] =
+ new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} ||
+ (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
+ (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
+ NumParser)
+
+println(E.parse_all(Tk.fromString("1 + 2 * 3")))
+println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/token.scala Mon Sep 07 12:18:07 2020 +0100
@@ -0,0 +1,342 @@
+// A simple lexer inspired by work of Sulzmann & Lu
+//==================================================
+
+
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+// regular expressions including records
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class RECD(x: String, r: Rexp) extends Rexp
+
+// values
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]): Rexp = s match {
+ case Nil => ONE
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp =
+ charlist2rexp(s.toList)
+
+implicit def RexpOps(r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+ def $ (r: Rexp) = RECD(s, r)
+}
+
+def nullable(r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case RECD(_, r1) => nullable(r1)
+}
+
+def der(c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case RECD(_, r1) => der(c, r1)
+}
+
+
+// extracts a string from value
+def flatten(v: Val) : String = v match {
+ case Empty => ""
+ case Chr(c) => c.toString
+ case Left(v) => flatten(v)
+ case Right(v) => flatten(v)
+ case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+ case Stars(vs) => vs.map(flatten).mkString
+ case Rec(_, v) => flatten(v)
+}
+
+
+// extracts an environment from a value;
+// used for tokenise a string
+def env(v: Val) : List[(String, String)] = v match {
+ case Empty => Nil
+ case Chr(c) => Nil
+ case Left(v) => env(v)
+ case Right(v) => env(v)
+ case Sequ(v1, v2) => env(v1) ::: env(v2)
+ case Stars(vs) => vs.flatMap(env)
+ case Rec(x, v) => (x, flatten(v))::env(v)
+}
+
+// The Injection Part of the lexer
+
+def mkeps(r: Rexp) : Val = r match {
+ case ONE => Empty
+ case ALT(r1, r2) =>
+ if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ case STAR(r) => Stars(Nil)
+ case RECD(x, r) => Rec(x, mkeps(r))
+}
+
+def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ case (CHAR(d), Empty) => Chr(c)
+ case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+}
+
+// some "rectification" functions for simplification
+def F_ID(v: Val): Val = v
+def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ case Right(v) => Right(f2(v))
+ case Left(v) => Left(f1(v))
+}
+def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+}
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+ (v:Val) => Sequ(f1(Empty), f2(v))
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+ (v:Val) => Sequ(f1(v), f2(Empty))
+def F_RECD(f: Val => Val) = (v:Val) => v match {
+ case Rec(x, v) => Rec(x, f(v))
+}
+def F_ERROR(v: Val): Val = throw new Exception("error")
+
+def simp(r: Rexp): (Rexp, Val => Val) = r match {
+ case ALT(r1, r2) => {
+ val (r1s, f1s) = simp(r1)
+ val (r2s, f2s) = simp(r2)
+ (r1s, r2s) match {
+ case (ZERO, _) => (r2s, F_RIGHT(f2s))
+ case (_, ZERO) => (r1s, F_LEFT(f1s))
+ case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+ else (ALT (r1s, r2s), F_ALT(f1s, f2s))
+ }
+ }
+ case SEQ(r1, r2) => {
+ val (r1s, f1s) = simp(r1)
+ val (r2s, f2s) = simp(r2)
+ (r1s, r2s) match {
+ case (ZERO, _) => (ZERO, F_ERROR)
+ case (_, ZERO) => (ZERO, F_ERROR)
+ case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+ case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+ case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+ }
+ }
+ case RECD(x, r1) => {
+ val (r1s, f1s) = simp(r1)
+ (RECD(x, r1s), F_RECD(f1s))
+ }
+ case r => (r, F_ID)
+}
+
+// lexing functions including simplification
+def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+ case Nil => if (nullable(r)) mkeps(r) else
+ { throw new Exception("lexing error") }
+ case c::cs => {
+ val (r_simp, f_simp) = simp(der(c, r))
+ inj(r, c, f_simp(lex_simp(r_simp, cs)))
+ }
+}
+
+def lexing_simp(r: Rexp, s: String) =
+ env(lex_simp(r, s.toList))
+
+
+// The Lexing Rules for the Fun Language
+
+def PLUS(r: Rexp) = r ~ r.%
+
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => ZERO
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SYM ~ (SYM | DIGIT).%
+val NUM = PLUS(DIGIT)
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+val SEMI: Rexp = ";"
+val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+val WHITESPACE = PLUS(" " | "\n" | "\t")
+val RPAREN: Rexp = "{"
+val LPAREN: Rexp = "}"
+val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+val WHILE_REGS = (("k" $ KEYWORD) |
+ ("i" $ ID) |
+ ("o" $ OP) |
+ ("n" $ NUM) |
+ ("s" $ SEMI) |
+ ("str" $ STRING) |
+ ("p" $ (LPAREN | RPAREN)) |
+ ("w" $ WHITESPACE)).%
+
+
+// Two Simple While Tests
+//========================
+
+println("test: read n")
+
+val prog0 = """read n"""
+println(lexing_simp(WHILE_REGS, prog0))
+
+println("test: read n; write n ")
+
+val prog1 = """read n; write n"""
+println(lexing_simp(WHILE_REGS, prog1))
+
+
+// Bigger Tests
+//==============
+
+// escapes strings and prints them out as "", "\n" and so on
+def esc(raw: String): String = {
+ import scala.reflect.runtime.universe._
+ Literal(Constant(raw)).toString
+}
+
+def escape(tks: List[(String, String)]) =
+ tks.map{ case (s1, s2) => (s1, esc(s2))}
+
+val prog2 = """
+write "Fib";
+read n;
+minus1 := 0;
+minus2 := 1;
+while n > 0 do {
+ temp := minus2;
+ minus2 := minus1 + minus2;
+ minus1 := temp;
+ n := n - 1
+};
+write "Result";
+write minus2
+"""
+
+println("Tokens for Fib")
+println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
+
+
+
+val prog3 = """
+start := 1000;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+}
+"""
+
+println("Tokens for Loops")
+println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
+
+
+
+
+
+
+
+// The tokens for the WHILE language
+import java.io._
+
+abstract class Token extends Serializable
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(n: Int) extends Token
+case class T_KWD(s: String) extends Token
+case class T_STR(s: String) extends Token
+
+val token : PartialFunction[(String, String), Token] = {
+ case ("s", _) => T_SEMI
+ case ("p", "{") => T_LPAREN
+ case ("p", "}") => T_RPAREN
+ case ("i", s) => T_ID(s)
+ case ("o", s) => T_OP(s)
+ case ("n", s) => T_NUM(s.toInt)
+ case ("k", s) => T_KWD(s)
+ case ("str", s) => T_STR(s)
+}
+
+def tokenise(s: String) : List[Token] =
+ lexing_simp(WHILE_REGS, s).collect(token)
+
+tokenise(prog2)
+tokenise(prog3)
+
+
+object Lexer extends App {
+
+ val fname = "/tmp/nflx"
+
+ def serialise(tks: List[Token]) = {
+ val out = new ObjectOutputStream(new FileOutputStream(fname))
+ out.writeObject(tks)
+ out.close
+ }
+
+ def deserialise() : List[Token] = {
+ val in = new ObjectInputStream(new FileInputStream(fname))
+ val tks = in.readObject.asInstanceOf[List[Token]]
+ in.close
+ tks
+ }
+
+ serialise(tokenise(prog2))
+ println(deserialise())
+
+}
\ No newline at end of file
Binary file handouts/amm-ho.pdf has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/amm-ho.tex Mon Sep 07 12:18:07 2020 +0100
@@ -0,0 +1,126 @@
+\documentclass{article}
+\usepackage{../style}
+\usepackage{../langs}
+\usepackage{marvosym}
+
+%cheat sheet
+%http://worldline.github.io/scala-cheatsheet/
+
+\begin{document}
+
+\section*{Scala in 6CCS3CFL}
+
+For the coursework in this module you are free to use any programming
+language you like, but I will show you all my code using Scala---I hope
+you have fond memories of Scala from PEP. I will use the current
+stable version of Scala, which is 2.13.3. If you need a reminder of
+the Scala handouts for PEP have a look
+\hr{http://talisker.nms.kcl.ac.uk/cgi-bin/repos.cgi/pep-material/raw-file/tip/handouts/pep-ho.pdf}\bigskip
+
+\noindent
+The main difference to the Scala I showed you in PEP is that in CFL
+I will use the Ammonite REPL
+
+\begin{quote}
+\url{https://ammonite.io/#Ammonite-REPL}
+\end{quote}
+
+\noindent
+This is a drop-in replacement for the original Scala REPL and
+works very similarly, for example
+
+\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+$ amm
+Loading...
+Welcome to the Ammonite Repl 2.2.0 (Scala 2.13.3 Java 9)
+scala> 1 + 2
+res0: Int = 3
+\end{lstlisting} %% $
+
+\noindent
+Ammonite uses the same Scala compiler, just adds some useful features
+on top of it. It is quite main-stream in the Scala community and it should
+therefore be very easy for you to install \texttt{amm}. The big
+advantage of Ammonite is that it comes with some additional libraries
+already built-in and also allows one to easily break up code into
+smaller modules. For example reading and writing files in Ammonite can
+be achieved with
+
+\begin{lstlisting}[numbers=none,language=Scala]
+scala> import ammonite.ops._
+
+scala> read(pwd / "file.name")
+res1: String = """..."""
+
+scala> write.over(pwd / "file.name", "foo bar")
+\end{lstlisting}
+
+\noindent
+For loading and accessing code from another Scala file, you can import it
+as follows:
+
+\begin{lstlisting}[numbers=none,language=Scala]
+import $file.name-of-the-file
+import name-of-the-file._
+\end{lstlisting} %% $
+
+\noindent
+This assumes the other Scala file is called
+\texttt{name-of-the-file.sc} and requires the file to be in the same
+directory where \texttt{amm} is working in. This will be very convenient
+for the compiler we implement in CFL, because it allows us to easily
+break-up the code into the lexer, parser and code generator.
+
+Another feature which exists in Ammonite, but not yet in the
+current version of Scala (it will be in the next version called dotty)
+is that you can mark functions as \texttt{@main}. For example
+
+\begin{lstlisting}[numbers=none,language=Scala]
+@main
+def foo() = ...
+\end{lstlisting}
+
+\noindent
+This means you can now call that function from the command line like
+
+\begin{lstlisting}[numbers=none,language=Scala]
+$ amm file.sc foo
+\end{lstlisting} %% $
+
+\noindent
+If you want to specify an argument on the commandline, say an int and
+a string, then you can write
+
+\begin{lstlisting}[numbers=none,language=Scala]
+@main
+def bar(i: Int, s: String) = ...
+\end{lstlisting}
+
+\noindent
+and then call
+
+\begin{lstlisting}[numbers=none,language=Scala]
+$ amm file.sc 42 foobar
+\end{lstlisting} %% $
+
+\noindent
+What is also good in Ammonite that you can specify more than one
+function to be ``main'' and then specify on the command line which
+function do you want to run as entry-point.\bigskip
+
+\noindent
+To sum up, Ammonite is a really useful addition to the Scala ecosystem.
+You can find more information about how to use it in the first five chapters
+of the ``Hands-on Scala'' book by Li Haoyi. These chapters are
+free and can be used as a reference, see:
+
+\begin{center}
+\url{https://www.handsonscala.com/part-i-introduction-to-scala.html}
+\end{center}
+
+\end{document}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End:
--- a/progs/cw1.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-// CW 1
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-abstract class Rexp
-case object ZERO extends Rexp // matches nothing
-case object ONE extends Rexp // matches the empty string
-case class CHAR(c: Char) extends Rexp // matches a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
-case class STAR(r: Rexp) extends Rexp // star
-case class RANGE(cs: Set[Char]) extends Rexp // set of characters
-case class PLUS(r: Rexp) extends Rexp // plus
-case class OPT(r: Rexp) extends Rexp // optional
-case class NTIMES(r: Rexp, n: Int) extends Rexp // n-times
-case class UPNTIMES(r: Rexp, n: Int) extends Rexp // up n-times
-case class FROMNTIMES(r: Rexp, n: Int) extends Rexp // from n-times
-case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times
-case class NOT(r: Rexp) extends Rexp // not
-case class CFUN(f: Char => Boolean) extends Rexp // subsuming CHAR and RANGE
-
-def CHAR(c: Char) = CFUN(d => c == d)
-val ALL = CFUN(_ => true)
-def RANGE(cs: Set[Char]) = CFUN(d => cs.contains(d))
-
-def CHAR(c: Char) = CFUN(c == _)
-val ALL = CFUN(_ => true)
-def RANGE(cs: Set[Char]) = CFUN(cs.contains(_))
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case RANGE(_) => false
- case PLUS(r) => nullable(r)
- case OPT(_) => true
- case NTIMES(r, n) => if (n == 0) true else nullable(r)
- case UPNTIMES(_, _) => true
- case FROMNTIMES(r, n) => if (n == 0) true else nullable(r)
- case NMTIMES(r, n, m) => if (n == 0) true else nullable(r)
- case NOT(r) => !nullable(r)
- case CFUN(_) => false
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case RANGE(cs) => if (cs contains c) ONE else ZERO
- case PLUS(r) => SEQ(der(c, r), STAR(r))
- case OPT(r) => der(c, r)
- case NTIMES(r, n) =>
- if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1))
- case UPNTIMES(r, n) =>
- if (n == 0) ZERO else SEQ(der(c, r), UPNTIMES(r, n - 1))
- case FROMNTIMES(r, n) =>
- if (n == 0) SEQ(der(c, r), STAR(r)) else SEQ(der(c, r), FROMNTIMES(r, n - 1))
- case NMTIMES(r, n, m) =>
- if (m < n) ZERO else
- if (n == 0 && m == 0) ZERO else
- if (n == 0) SEQ(der(c, r), UPNTIMES(r, m - 1))
- else SEQ(der(c, r), NMTIMES(r, n - 1, m - 1))
- case NOT(r) => NOT(der (c, r))
- case CFUN(f) => if (f(c)) ONE else ZERO
-}
-
-def simp(r: Rexp) : Rexp = r match {
- case ALT(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- case r => r
-}
-
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-def ders_simp (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders_simp(s, simp(der(c, r)))
-}
-
-// main matcher function including simplification
-def matcher(r: Rexp, s: String) : Boolean =
- nullable(ders_simp(s.toList, r))
-
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps (r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-val r1 = NTIMES("a", 3)
-val r2 = NTIMES(OPT("a"), 3)
-val r3 = UPNTIMES("a", 3)
-val r4 = UPNTIMES(OPT("a"), 3)
-val r5 = NMTIMES("a", 3, 5)
-val r6 = NMTIMES(OPT("a"), 3, 5)
-
-val rs = List(r1, r2, r3, r4, r5, r6)
-
-rs.map(matcher(_, ""))
-rs.map(matcher(_, "a"))
-rs.map(matcher(_, "aa"))
-rs.map(matcher(_, "aaa"))
-rs.map(matcher(_, "aaaa"))
-rs.map(matcher(_, "aaaaa"))
-rs.map(matcher(_, "aaaaaa"))
-
-println("EMAIL:")
-val LOWERCASE = ('a' to 'z').toSet
-val DIGITS = ('0' to '9').toSet
-val EMAIL = { PLUS(CFUN(LOWERCASE | DIGITS | ("_.-").toSet)) ~ "@" ~
- PLUS(CFUN(LOWERCASE | DIGITS | (".-").toSet)) ~ "." ~
- NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) }
-
-val my_email = "christian.urban@kcl.ac.uk"
-
-println(EMAIL);
-println(matcher(EMAIL, my_email))
-println(ders_simp(my_email.toList, EMAIL))
-
-println("COMMENTS:")
-val ALL = CFUN((c:Char) => true)
-val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/"
-
-println(matcher(COMMENT, "/**/"))
-println(matcher(COMMENT, "/*foobar*/"))
-println(matcher(COMMENT, "/*test*/test*/"))
-println(matcher(COMMENT, "/*test/*test*/"))
-
-println("EVILS:")
-val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a"))
-val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a?
-
-
-//40 * aaa matches
-//43 * aaa + aa does not
-//45 * aaa + a
-
-println("EVIL1:")
-println(matcher(EVIL1, "aaa" * 40))
-println(matcher(EVIL1, "aaa" * 43 + "aa"))
-println(matcher(EVIL1, "aaa" * 45 + "a"))
-println("EVIL2:")
-println(matcher(EVIL2, "aaa" * 40))
-println(matcher(EVIL2, "aaa" * 43 + "aa"))
-println(matcher(EVIL2, "aaa" * 45 + "a"))
-
-println("TEST for bug pointed out by Filips Ivanovs")
-val test = NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6)
-
-println(matcher(test,"a"))
-println(matcher(test,"ab"))
-println(matcher(test,"abcdef"))
-println(matcher(test,"abc"))
-println(matcher(test,"...."))
-println(matcher(test,"asdfg"))
-println(matcher(test,"abcdefg"))
-
-println(test)
-println(ders_simp("a".toList, test))
-println(ders_simp("aa".toList, test))
-println(ders_simp("aaa".toList, test))
-println(ders_simp("aaaa".toList, test))
-println(ders_simp("aaaaa".toList, test))
-println(ders_simp("aaaaaa".toList, test))
-println(ders_simp("aaaaaaa".toList, test))
-
-def TEST(s: String, r: Rexp) = {
- println("Rexp |" + s + "|")
- println("Derivative:\n" + ders_simp(s.toList, r))
- println("Is Nullable: " + nullable(ders_simp(s.toList, r)))
- Console.readLine
-}
-
-
-val ALL2 = "a" | "f"
-val COMMENT2 = ("/*" ~ NOT(ALL2.% ~ "*/" ~ ALL2.%) ~ "*/")
-
-println("1) TEST TEST")
-TEST("", COMMENT2)
-TEST("/", COMMENT2)
-TEST("/*", COMMENT2)
-TEST("/*f", COMMENT2)
-TEST("/*f*", COMMENT2)
-TEST("/*f*/", COMMENT2)
-TEST("/*f*/ ", COMMENT2)
-
-val ALL3 = "a" | "f"
-val COMMENT3 = ("/" ~ NOT(ALL3.% ~ "&" ~ ALL3.%) ~ "&")
-
-println("2) TEST TEST")
-TEST("", COMMENT3)
-TEST("/", COMMENT3)
-TEST("/", COMMENT3)
-TEST("/f", COMMENT3)
-TEST("/f&", COMMENT3)
-TEST("/f& ", COMMENT3)
-
-//test: ("a" | "aa") ~ ("a" | "aa")*
-val auxEVIL3 = ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
-val EVIL3 = SEQ(auxEVIL3, STAR(auxEVIL3))
-val EVIL3p = FROMNTIMES(auxEVIL3, 1)
-
-
-for (i <- 1 to 100 by 1) {
- println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + " size: " +
- size(ders(("a" * i).toList, EVIL3)))
-}
-
-
-
-val t1 = EVIL3
-val t2 = simp(der('a', t1))
-val t3 = simp(der('a', t2))
-val t4 = simp(der('a', t3))
-
-val t1 = EVIL3p
-val t2 = simp(der('a', t1))
-val t3 = simp(der('a', t2))
-val t4 = simp(der('a', t3))
--- a/progs/matcher.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-package object matcher {
-
-// regular expressions
-// including constructors for NOT and ALLC
-sealed abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case object ALLC extends Rexp // recognises any character
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp // negation of a regular expression
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case ALLC => EMPTY
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-// main class for the tokenizer
-case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) {
-
-def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, action(t))
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), action, s, t ::: List(c))
- }
-
-def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
- val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
- if (somes == Nil) Right(s.mkString)
- else Left(somes sortBy (_._1.length) head)
-}
-
-def tokenize(cs: List[Char]) : List[T] = cs match {
- case Nil => Nil
- case _ => one_token(cs) match {
- case Left((rest, token)) => token :: tokenize(rest)
- case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
- }
-}
-
-def fromString(s: String) : List[T] =
- tokenize(s.toList).filterNot(excl.contains(_))
-
-def fromFile(name: String) : List[T] =
- fromString(io.Source.fromFile(name).mkString)
-
-}
-
-
-// regular expression for specifying
-// ranges of characters
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-
-// one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-// many alternatives
-def Alts(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => ALT(r, Alts(rs))
-}
-def ALTS(rs: Rexp*) = Alts(rs.toList)
-
-// repetitions
-def Seqs(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => SEQ(r, Seqs(rs))
-}
-def SEQS(rs: Rexp*) = Seqs(rs.toList)
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-}
--- a/progs/parser2.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-// A naive version of parser combinators producing parse trees
-//
-// Needs
-// :load matcher.scala
-
-// some regular expressions
-val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
-val ID = PLUS(LETTER)
-
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_IF extends Token
-case object T_THEN extends Token
-case object T_ELSE extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List(("if", (s) => T_IF),
- ("then", (s) => T_THEN),
- ("else", (s) => T_ELSE),
- (NUMBER, (s) => T_NUM(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-
-// parse trees
-abstract class ParseTree
-case class Leaf(t: Token) extends ParseTree
-case class Branch(pts: List[ParseTree]) extends ParseTree
-
-def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
- case Leaf(t) => Branch(List(Leaf(t), pt2))
- case Branch(pts) => Branch(pts ++ List(pt2))
-}
-
-// parser combinators
-abstract class Parser {
- def parse(ts: List[Token]): Set[(ParseTree, List[Token])]
-
- def parse_all(ts: List[Token]) : Set[ParseTree] =
- for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
-
- def || (right : => Parser) : Parser = new AltParser(this, right)
- def ~ (right : => Parser) : Parser = new SeqParser(this, right)
-}
-
-class AltParser(p: => Parser, q: => Parser) extends Parser {
- def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)
-}
-
-class SeqParser(p: => Parser, q: => Parser) extends Parser {
- def parse(ts: List[Token]) =
- for ((head1, tail1) <- p.parse(ts);
- (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)
-}
-
-class ListParser(ps: => List[Parser]) extends Parser {
- def parse(ts: List[Token]) = ps match {
- case Nil => Set()
- case p::Nil => p.parse(ts)
- case p::ps =>
- for ((head1, tail1) <- p.parse(ts);
- (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)
- }
-}
-
-case class TokParser(tok: Token) extends Parser {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((Leaf(t), ts))
- case _ => Set ()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object IdParser extends Parser {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts))
- case _ => Set ()
- }
-}
-
-case object NumParser extends Parser {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts))
- case _ => Set ()
- }
-}
-
-lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol
-lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
-lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
-
-println(Tok.fromString("1 + 2 + 3"))
-println(E.parse_all(Tok.fromString("1 + 2 + 3")))
-
-def eval(t: ParseTree) : Int = t match {
- case Leaf(T_NUM(n)) => n.toInt
- case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
- case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
- case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t)
-}
-
-(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
-(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
-
-lazy val EXPR: Parser =
- new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) ||
- new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) ||
- IdParser
-
-println(EXPR.parse_all(Tok.fromString("if a then b else c")))
-println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
-
-
-
-
--- a/progs/parser2a.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-// Parser combinators including semantic actions
-// parses lists of tokens
-//
-// Needs
-// :load matcher.scala
-
-// some regular expressions
-val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
-val ID = PLUS(LETTER)
-
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_IF extends Token
-case object T_THEN extends Token
-case object T_ELSE extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List(("if", (s) => T_IF),
- ("then", (s) => T_THEN),
- ("else", (s) => T_ELSE),
- (NUMBER, (s) => T_NUM(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-// parser combinators with return type T
-abstract class Parser[T] {
- def parse(ts: List[Token]): Set[(T, List[Token])]
-
- def parse_all(ts: List[Token]) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
-
- def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
- def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2)
- def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1)
-
-}
-
-class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
- def parse(sb: List[Token]) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
-}
-
-class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
- def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
- def parse (sb: List[Token]) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
-
-
-case class TokParser(tok: Token) extends Parser[Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
-lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
-lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
-
-println(E.parse_all(Tok.fromString("1 + 2 + 3")))
-println(E.parse_all(Tok.fromString("1 + 2 * 3")))
-println(E.parse_all(Tok.fromString("(1 + 2) * 3")))
-
-// Excercise: implement minus
-println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
-println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))
--- a/progs/parser3.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-package object parser {
-
-// parser combinators
-// with input type I and return type T
-//
-// needs to be compiled with scalac parser3.scala
-
-abstract class Parser[I <% Seq[_], T] {
- def parse(ts: I): Set[(T, I)]
-
- def parse_all(ts: I) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
-
- def parse_single(ts: I) : T = parse_all(ts).toList match {
- case t::Nil => t
- case _ => { println ("Parse Error") ; sys.exit(-1) }
- }
-
- def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
-}
-
-class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
-}
-
-class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
-
-}
--- a/progs/parser5.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-
-// parser combinators with input type I and return type T
-// and memoisation
-
-case class SubList[T](s: List[T], l: Int, h: Int) {
- def low = l
- def high = h
- def length = h - l
- def sublist(l: Int = l, h: Int = h) = s.slice(l, h)
- def set(low: Int = l, high: Int = h) = SubList(s, low, high)
-}
-
-type Ctxt[T] = List[(String, SubList[T])]
-
-abstract class Parser[I, T] {
-
- def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])]
-
- def parse_all(s: List[I]) : Set[T] =
- for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head
-
- def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
-}
-
-class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) =
- for ((head1, tail1) <- p.parse(sb, ctxt);
- (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
-}
-
-class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
-}
-
-class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) =
- for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
-}
-
-case object NumParser extends Parser[Token, Int] {
- def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
- if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match {
- case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1)))
- case _ => Set()
- }
- else Set()
- }
-}
-
-case class TokParser(t: Token) extends Parser[Token, Token] {
- def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
- if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1)))
- else Set()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
- if (sb.length == 0) Set()
- else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
- yield (head, tail.set(high = tail.high + 1))
- }
-}
-
-class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
- val should_trim = ctxt.contains (nt, sb)
- if (should_trim && sb.length == 0) Set()
- else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
- else p.parse(sb, (nt, sb)::ctxt)
- }
-}
-
-lazy val E: Parser[Token, Int] =
- new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} ||
- (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
- (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
- NumParser)
-
-println(E.parse_all(Tk.fromString("1 + 2 * 3")))
-println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
--- a/progs/token.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,342 +0,0 @@
-// A simple lexer inspired by work of Sulzmann & Lu
-//==================================================
-
-
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-// regular expressions including records
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
-
-// values
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]): Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp =
- charlist2rexp(s.toList)
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
- def $ (r: Rexp) = RECD(s, r)
-}
-
-def nullable(r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case RECD(_, r1) => nullable(r1)
-}
-
-def der(c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case RECD(_, r1) => der(c, r1)
-}
-
-
-// extracts a string from value
-def flatten(v: Val) : String = v match {
- case Empty => ""
- case Chr(c) => c.toString
- case Left(v) => flatten(v)
- case Right(v) => flatten(v)
- case Sequ(v1, v2) => flatten(v1) + flatten(v2)
- case Stars(vs) => vs.map(flatten).mkString
- case Rec(_, v) => flatten(v)
-}
-
-
-// extracts an environment from a value;
-// used for tokenise a string
-def env(v: Val) : List[(String, String)] = v match {
- case Empty => Nil
- case Chr(c) => Nil
- case Left(v) => env(v)
- case Right(v) => env(v)
- case Sequ(v1, v2) => env(v1) ::: env(v2)
- case Stars(vs) => vs.flatMap(env)
- case Rec(x, v) => (x, flatten(v))::env(v)
-}
-
-// The Injection Part of the lexer
-
-def mkeps(r: Rexp) : Val = r match {
- case ONE => Empty
- case ALT(r1, r2) =>
- if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
- case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
- case STAR(r) => Stars(Nil)
- case RECD(x, r) => Rec(x, mkeps(r))
-}
-
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
- case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
- case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
- case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
- case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
-}
-
-// some "rectification" functions for simplification
-def F_ID(v: Val): Val = v
-def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
-def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
-def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Right(v) => Right(f2(v))
- case Left(v) => Left(f1(v))
-}
-def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
-}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
- case Rec(x, v) => Rec(x, f(v))
-}
-def F_ERROR(v: Val): Val = throw new Exception("error")
-
-def simp(r: Rexp): (Rexp, Val => Val) = r match {
- case ALT(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (r2s, F_RIGHT(f2s))
- case (_, ZERO) => (r1s, F_LEFT(f1s))
- case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
- }
- }
- case SEQ(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (ZERO, F_ERROR)
- case (_, ZERO) => (ZERO, F_ERROR)
- case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
- case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
- case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
- }
- }
- case RECD(x, r1) => {
- val (r1s, f1s) = simp(r1)
- (RECD(x, r1s), F_RECD(f1s))
- }
- case r => (r, F_ID)
-}
-
-// lexing functions including simplification
-def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else
- { throw new Exception("lexing error") }
- case c::cs => {
- val (r_simp, f_simp) = simp(der(c, r))
- inj(r, c, f_simp(lex_simp(r_simp, cs)))
- }
-}
-
-def lexing_simp(r: Rexp, s: String) =
- env(lex_simp(r, s.toList))
-
-
-// The Lexing Rules for the Fun Language
-
-def PLUS(r: Rexp) = r ~ r.%
-
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => ZERO
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
-val SEMI: Rexp = ";"
-val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
-val WHITESPACE = PLUS(" " | "\n" | "\t")
-val RPAREN: Rexp = "{"
-val LPAREN: Rexp = "}"
-val STRING: Rexp = "\"" ~ SYM.% ~ "\""
-
-
-val WHILE_REGS = (("k" $ KEYWORD) |
- ("i" $ ID) |
- ("o" $ OP) |
- ("n" $ NUM) |
- ("s" $ SEMI) |
- ("str" $ STRING) |
- ("p" $ (LPAREN | RPAREN)) |
- ("w" $ WHITESPACE)).%
-
-
-// Two Simple While Tests
-//========================
-
-println("test: read n")
-
-val prog0 = """read n"""
-println(lexing_simp(WHILE_REGS, prog0))
-
-println("test: read n; write n ")
-
-val prog1 = """read n; write n"""
-println(lexing_simp(WHILE_REGS, prog1))
-
-
-// Bigger Tests
-//==============
-
-// escapes strings and prints them out as "", "\n" and so on
-def esc(raw: String): String = {
- import scala.reflect.runtime.universe._
- Literal(Constant(raw)).toString
-}
-
-def escape(tks: List[(String, String)]) =
- tks.map{ case (s1, s2) => (s1, esc(s2))}
-
-val prog2 = """
-write "Fib";
-read n;
-minus1 := 0;
-minus2 := 1;
-while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
-};
-write "Result";
-write minus2
-"""
-
-println("Tokens for Fib")
-println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
-
-
-
-val prog3 = """
-start := 1000;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-"""
-
-println("Tokens for Loops")
-println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
-
-
-
-
-
-
-
-// The tokens for the WHILE language
-import java.io._
-
-abstract class Token extends Serializable
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_KWD(s: String) extends Token
-case class T_STR(s: String) extends Token
-
-val token : PartialFunction[(String, String), Token] = {
- case ("s", _) => T_SEMI
- case ("p", "{") => T_LPAREN
- case ("p", "}") => T_RPAREN
- case ("i", s) => T_ID(s)
- case ("o", s) => T_OP(s)
- case ("n", s) => T_NUM(s.toInt)
- case ("k", s) => T_KWD(s)
- case ("str", s) => T_STR(s)
-}
-
-def tokenise(s: String) : List[Token] =
- lexing_simp(WHILE_REGS, s).collect(token)
-
-tokenise(prog2)
-tokenise(prog3)
-
-
-object Lexer extends App {
-
- val fname = "/tmp/nflx"
-
- def serialise(tks: List[Token]) = {
- val out = new ObjectOutputStream(new FileOutputStream(fname))
- out.writeObject(tks)
- out.close
- }
-
- def deserialise() : List[Token] = {
- val in = new ObjectInputStream(new FileInputStream(fname))
- val tks = in.readObject.asInstanceOf[List[Token]]
- in.close
- tks
- }
-
- serialise(tokenise(prog2))
- println(deserialise())
-
-}
\ No newline at end of file
--- a/progs/token2.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,463 +0,0 @@
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-import scala.annotation.tailrec
-
-abstract class Rexp
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
-case class CRANGE(cs: String) extends Rexp
-case class PLUS(r: Rexp) extends Rexp
-case class OPT(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Seq(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]): Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
- def $ (r: Rexp) = RECD(s, r)
-}
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case RECD(_, r) => nullable(r)
- case CRANGE(_) => false
- case PLUS(r) => nullable(r)
- case OPT(_) => true
- case NTIMES(r, n) => if (n == 0) true else nullable(r)
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case RECD(_, r1) => der(c, r1)
- case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL
- case PLUS(r) => SEQ(der(c, r), STAR(r))
- case OPT(r) => ALT(der(c, r), NULL)
- case NTIMES(r, n) => if (n == 0) NULL else der(c, SEQ(r, NTIMES(r, n - 1)))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// extracts a string from value
-def flatten(v: Val) : String = v match {
- case Empty => ""
- case Chr(c) => c.toString
- case Left(v) => flatten(v)
- case Right(v) => flatten(v)
- case Seq(v1, v2) => flatten(v1) + flatten(v2)
- case Stars(vs) => vs.map(flatten).mkString
- case Rec(_, v) => flatten(v)
-}
-
-// extracts an environment from a value
-def env(v: Val) : List[(String, String)] = v match {
- case Empty => Nil
- case Chr(c) => Nil
- case Left(v) => env(v)
- case Right(v) => env(v)
- case Seq(v1, v2) => env(v1) ::: env(v2)
- case Stars(vs) => vs.flatMap(env)
- case Rec(x, v) => (x, flatten(v))::env(v)
-}
-
-// injection part
-def mkeps(r: Rexp) : Val = r match {
- case EMPTY => Empty
- case ALT(r1, r2) =>
- if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
- case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2))
- case STAR(r) => Stars(Nil)
- case RECD(x, r) => Rec(x, mkeps(r))
- case PLUS(r) => Stars(List(mkeps(r)))
- case OPT(_) => Right(Empty)
- case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r)))
- case _ => { println ("r : " + r.toString);
- throw new Exception("mkeps error")}
-}
-
-
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
- case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (SEQ(r1, r2), Seq(v1, v2)) => Seq(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Left(Seq(v1, v2))) => Seq(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Right(v2)) => Seq(mkeps(r1), inj(r2, c, v2))
- case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
- case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(_), Empty) => Chr(c)
- case (CRANGE(_), Empty) => Chr(c)
- case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
- case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (OPT(r), Left(v1)) => Left(inj(r, c, v1))
- case (NTIMES(r, n), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (NTIMES(r, n), Left(Seq(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs)
- case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs)
- case _ => { println ("r : " + r.toString + " v: " + v.toString);
- throw new Exception("inj error")}
-}
-
-// main lexing function (produces a value)
-def lex(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
- case c::cs => inj(r, c, lex(der(c, r), cs))
-}
-
-def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
-
-lexing(("ab" | "ab") ~ ("b" | EMPTY), "ab")
-
-lexing(OPT("ab"), "ab")
-
-lexing(NTIMES("1", 3), "111")
-lexing(NTIMES("1" | EMPTY, 3), "11")
-
-// some "rectification" functions for simplification
-def F_ID(v: Val): Val = v
-def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
-def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
-def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Right(v) => Right(f2(v))
- case Left(v) => Left(f1(v))
-}
-def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Seq(v1, v2) => Seq(f1(v1), f2(v2))
-}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Seq(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Seq(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
- case Rec(x, v) => Rec(x, f(v))
-}
-def F_ERROR(v: Val): Val = throw new Exception("error")
-
-// simplification of regular expressions returning also an
-// rectification function; no simplification under STAR
-def simp(r: Rexp): (Rexp, Val => Val) = r match {
- case ALT(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (NULL, _) => (r2s, F_RIGHT(f2s))
- case (_, NULL) => (r1s, F_LEFT(f1s))
- case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
- }
- }
- case SEQ(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (NULL, _) => (NULL, F_ERROR)
- case (_, NULL) => (NULL, F_ERROR)
- case (EMPTY, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
- case (_, EMPTY) => (r1s, F_SEQ_Empty2(f1s, f2s))
- case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
- }
- }
- case RECD(x, r1) => {
- val (r1s, f1s) = simp(r1)
- (RECD(x, r1s), F_RECD(f1s))
- }
- case r => (r, F_ID)
-}
-
-def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
- case c::cs => {
- val (r_simp, f_simp) = simp(der(c, r))
- inj(r, c, f_simp(lex_simp(r_simp, cs)))
- }
-}
-
-def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
-
-lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
-
-lexing_simp(OPT("ab"), "ab")
-
-// Lexing Rules for a Small While Language
-
-val SYM = CRANGE("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
-val DIGIT = CRANGE("0123456789")
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
-val SEMI: Rexp = ";"
-val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
-val WHITESPACE = PLUS(" " | "\n" | "\t")
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-val STRING: Rexp = "\"" ~ SYM.% ~ "\""
-
-
-val WHILE_REGS = (("k" $ KEYWORD) |
- ("i" $ ID) |
- ("o" $ OP) |
- ("n" $ NUM) |
- ("s" $ SEMI) |
- ("str" $ STRING) |
- ("p" $ (LPAREN | RPAREN)) |
- ("b" $ (BEGIN | END)) |
- ("w" $ WHITESPACE)).%
-
-// filters out all white spaces
-def tokenise(r: Rexp, s: String) =
- env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}
-
-// Testing
-//============
-
-def time[T](code: => T) = {
- val start = System.nanoTime()
- val result = code
- val end = System.nanoTime()
- println((end - start)/1.0e9)
- result
-}
-
-println(lexing_simp(OPT("1"), "1"))
-println(lexing_simp(OPT("1"), ""))
-println(ders("111".toList, NTIMES("1",3)))
-println(lexing_simp(NTIMES("1",3), "111"))
-
-val r1 = ("a" | "ab") ~ ("bcd" | "c")
-println(lexing(r1, "abcd"))
-
-val r2 = ("" | "a") ~ ("ab" | "b")
-println(lexing(r2, "ab"))
-
-
-// Two Simple While Tests
-//========================
-println("prog0 test")
-
-val prog0 = """read n"""
-println(env(lexing_simp(WHILE_REGS, prog0)))
-
-println("prog1 test")
-
-val prog1 = """read n; write (n)"""
-println(env(lexing_simp(WHILE_REGS, prog1)))
-
-
-// Big Test
-//==========
-
-def escape(raw: String): String = {
- import scala.reflect.runtime.universe._
- Literal(Constant(raw)).toString
-}
-
-val prog2 = """
-write "Fib";
-read n;
-minus1 := 0;
-minus2 := 1;
-while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
-};
-write "Result";
-write minus2
-"""
-
-val prog3 = """
-start := 1000;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-"""
-
-
-println("Tokens")
-println(tokenise(WHILE_REGS, prog2).mkString("\n"))
-
-val fib_tokens = tokenise(WHILE_REGS, prog2)
-fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
-
-
-val test_tokens = tokenise(WHILE_REGS, prog3)
-test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
-
-
-/*
-for (i <- 1 to 120 by 10) {
- print(i.toString + ": ")
- time(lexing_simp(WHILE_REGS, prog2 * i))
-}
-*/
-
-val toks_fib =
- List(("k","write"),
- ("str","Fib"),
- ("s",";"),
- ("k","read"),
- ("i","n"),
- ("s",";"),
- ("i","minus1"),
- ("o",":="),
- ("n","0"),
- ("s",";"),
- ("i","minus2"),
- ("o",":="),
- ("n","1"),
- ("s",";"),
- ("k","while"),
- ("i","n"),
- ("o",">"),
- ("n","0"),
- ("k","do"),
- ("b","{"),
- ("i","temp"),
- ("o",":="),
- ("i","minus2"),
- ("s",";"),
- ("i","minus2"),
- ("o",":="),
- ("i","minus1"),
- ("o","+"),
- ("i","minus2"),
- ("s",";"),
- ("i","minus1"),
- ("o",":="),
- ("i","temp"),
- ("s",";"),
- ("i","n"),
- ("o",":="),
- ("i","n"),
- ("o","-"),
- ("n","1"),
- ("b","}"),
- ("s",";"),
- ("k","write"),
- ("str","Result"),
- ("s",";"),
- ("k","write"),
- ("i","minus2"))
-
-val toks_test =
- List(("i","start"),
- ("o",":="),
- ("n","1000"),
- ("s",";"),
- ("i","x"),
- ("o",":="),
- ("i","start"),
- ("s",";"),
- ("i","y"),
- ("o",":="),
- ("i","start"),
- ("s",";"),
- ("i","z"),
- ("o",":="),
- ("i","start"),
- ("s",";"),
- ("k","while"),
- ("n","0"),
- ("o","<"),
- ("i","x"),
- ("k","do"),
- ("b","{"),
- ("k","while"),
- ("n","0"),
- ("o","<"),
- ("i","y"),
- ("k","do"),
- ("b","{"),
- ("k","while"),
- ("n","0"),
- ("o","<"),
- ("i","z"),
- ("k","do"),
- ("b","{"),
- ("i","z"),
- ("o",":="),
- ("i","z"),
- ("o","-"),
- ("n","1"),
- ("b","}"),
- ("s",";"),
- ("i","z"),
- ("o",":="),
- ("i","start"),
- ("s",";"),
- ("i","y"),
- ("o",":="),
- ("i","y"),
- ("o","-"),
- ("n","1"),
- ("b","}"),
- ("s",";"),
- ("i","y"),
- ("o",":="),
- ("i","start"),
- ("s",";"),
- ("i","x"),
- ("o",":="),
- ("i","x"),
- ("o","-"),
- ("n","1"),
- ("b","}"))
--- a/progs/tokenise.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,276 +0,0 @@
-// A simple tokeniser based on the Sulzmann & Lu algorithm
-//=========================================================
-//
-// call with
-//
-// scala tokenise.scala fib.while
-//
-// scala tokenise.scala loops.while
-//
-// this will generate a .tks file that can be deserialised back
-// into a list of tokens
-// you can add -Xno-patmat-analysis in order to get rid of the
-// match-not-exhaustive warning
-
-object Tokenise {
-
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-// regular expressions including records
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
-
-// values
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]): Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp =
- charlist2rexp(s.toList)
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
- def $ (r: Rexp) = RECD(s, r)
-}
-
-def nullable(r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case RECD(_, r1) => nullable(r1)
-}
-
-def der(c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case RECD(_, r1) => der(c, r1)
-}
-
-
-// extracts a string from value
-def flatten(v: Val) : String = v match {
- case Empty => ""
- case Chr(c) => c.toString
- case Left(v) => flatten(v)
- case Right(v) => flatten(v)
- case Sequ(v1, v2) => flatten(v1) + flatten(v2)
- case Stars(vs) => vs.map(flatten).mkString
- case Rec(_, v) => flatten(v)
-}
-
-
-// extracts an environment from a value;
-// used for tokenise a string
-def env(v: Val) : List[(String, String)] = v match {
- case Empty => Nil
- case Chr(c) => Nil
- case Left(v) => env(v)
- case Right(v) => env(v)
- case Sequ(v1, v2) => env(v1) ::: env(v2)
- case Stars(vs) => vs.flatMap(env)
- case Rec(x, v) => (x, flatten(v))::env(v)
-}
-
-// The Injection Part of the lexer
-
-def mkeps(r: Rexp) : Val = r match {
- case ONE => Empty
- case ALT(r1, r2) =>
- if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
- case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
- case STAR(r) => Stars(Nil)
- case RECD(x, r) => Rec(x, mkeps(r))
-}
-
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
- case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
- case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
- case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
- case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
-}
-
-// some "rectification" functions for simplification
-def F_ID(v: Val): Val = v
-def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
-def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
-def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Right(v) => Right(f2(v))
- case Left(v) => Left(f1(v))
-}
-def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
-}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
- case Rec(x, v) => Rec(x, f(v))
-}
-def F_ERROR(v: Val): Val = throw new Exception("error")
-
-def simp(r: Rexp): (Rexp, Val => Val) = r match {
- case ALT(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (r2s, F_RIGHT(f2s))
- case (_, ZERO) => (r1s, F_LEFT(f1s))
- case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
- }
- }
- case SEQ(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (ZERO, F_ERROR)
- case (_, ZERO) => (ZERO, F_ERROR)
- case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
- case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
- case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
- }
- }
- case r => (r, F_ID)
-}
-
-// lexing functions including simplification
-def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else
- { throw new Exception("lexing error") }
- case c::cs => {
- val (r_simp, f_simp) = simp(der(c, r))
- inj(r, c, f_simp(lex_simp(r_simp, cs)))
- }
-}
-
-def lexing_simp(r: Rexp, s: String) =
- env(lex_simp(r, s.toList))
-
-
-// The Lexing Rules for the WHILE Language
-
-// inefficient representations for some extended regular
-// expressions
-def PLUS(r: Rexp) = r ~ r.%
-
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => ZERO
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
-val SEMI: Rexp = ";"
-val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
-val WHITESPACE = PLUS(" " | "\n" | "\t")
-val RPAREN: Rexp = "{"
-val LPAREN: Rexp = "}"
-val STRING: Rexp = "\"" ~ SYM.% ~ "\""
-
-
-val WHILE_REGS = (("k" $ KEYWORD) |
- ("i" $ ID) |
- ("o" $ OP) |
- ("n" $ NUM) |
- ("s" $ SEMI) |
- ("str" $ STRING) |
- ("p" $ (LPAREN | RPAREN)) |
- ("w" $ WHITESPACE)).%
-
-
-
-// Generate tokens for the WHILE language
-// and serialise them into a .tks file
-
-import java.io._
-
-abstract class Token extends Serializable
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_KWD(s: String) extends Token
-case class T_STR(s: String) extends Token
-
-// transforms pairs into tokens
-val token : PartialFunction[(String, String), Token] = {
- case ("s", _) => T_SEMI
- case ("p", "{") => T_LPAREN
- case ("p", "}") => T_RPAREN
- case ("i", s) => T_ID(s)
- case ("o", s) => T_OP(s)
- case ("n", s) => T_NUM(s.toInt)
- case ("k", s) => T_KWD(s)
- case ("str", s) => T_STR(s)
-}
-
-// filters out all un-interesting tokens
-def tokenise(s: String) : List[Token] =
- lexing_simp(WHILE_REGS, s).collect(token)
-
-
-def serialise[T](fname: String, data: T) = {
- import scala.util.Using
- Using(new ObjectOutputStream(new FileOutputStream(fname))) {
- out => out.writeObject(data)
- }
-}
-
-def main(args: Array[String]) : Unit = {
- val fname = args(0)
- val tname = fname.stripSuffix(".while") ++ ".tks"
- val file = io.Source.fromFile(fname).mkString
- serialise(tname, tokenise(file))
-}
-
-
-}
--- a/progs/toks.scala Wed Sep 02 23:34:19 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-abstract class Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_KWD(s: String) extends Token
-case class T_STR(s: String) extends Token
-
-
-// Loops program
-//===============
-
-/*
-start := 1000;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do { z := z - 1 };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-*/
-
-val loops =
- List(T_ID("start"), T_OP(":="), T_NUM(1000), T_SEMI, T_ID("x"), T_OP(":="),
- T_ID("start"), T_SEMI, T_ID("y"), T_OP(":="), T_ID("start"), T_SEMI,
- T_ID("z"), T_OP(":="), T_ID("start"), T_SEMI, T_KWD("while"), T_NUM(0),
- T_OP("<"), T_ID("x"), T_KWD("do"), T_LPAREN, T_KWD("while"), T_NUM(0),
- T_OP("<"), T_ID("y"), T_KWD("do"), T_LPAREN, T_KWD("while"), T_NUM(0),
- T_OP("<"), T_ID("z"), T_KWD("do"), T_LPAREN, T_ID("z"), T_OP(":="),
- T_ID("z"), T_OP("-"), T_NUM(1), T_RPAREN, T_SEMI, T_ID("z"), T_OP(":="),
- T_ID("start"), T_SEMI, T_ID("y"), T_OP(":="), T_ID("y"), T_OP("-"),
- T_NUM(1), T_RPAREN, T_SEMI, T_ID("y"), T_OP(":="), T_ID("start"),
- T_SEMI, T_ID("x"), T_OP(":="), T_ID("x"), T_OP("-"), T_NUM(1), T_RPAREN)
-
-
-// Fib program
-//=============
-
-/*
-write "Fib";
-read n;
-minus1 := 0;
-minus2 := 1;
-while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
-};
-write "Result";
-write minus2
-*/
-
-val fib =
- List(T_KWD("write"), T_STR("Fib"), T_SEMI, T_KWD("read"), T_ID("n"),
- T_SEMI, T_ID("minus1"), T_OP(":="), T_NUM(0), T_SEMI, T_ID("minus2"),
- T_OP(":="), T_NUM(1), T_SEMI, T_KWD("while"), T_ID("n"), T_OP(">"),
- T_NUM(0), T_KWD("do"), T_LPAREN, T_ID("temp"), T_OP(":="),
- T_ID("minus2"), T_SEMI, T_ID("minus2"), T_OP(":="), T_ID("minus1"),
- T_OP("+"), T_ID("minus2"), T_SEMI, T_ID("minus1"), T_OP(":="),
- T_ID("temp"), T_SEMI, T_ID("n"), T_OP(":="), T_ID("n"), T_OP("-"),
- T_NUM(1), T_RPAREN, T_SEMI, T_KWD("write"), T_STR("Result"), T_SEMI,
- T_KWD("write"), T_ID("minus2"))
-
-
-
-// Factors program
-//=================
-
-/*
-write "Input n please";
-read n;
-write "The factors of n are";
-f := 2;
-while n != 1 do {
- while (n / f) * f == n do {
- write f;
- n := n / f
- };
- f := f + 1
-}
-*/
-
-val factors =
- List(T_KWD("write"), T_STR("Input n please"), T_SEMI, T_KWD("read"),
- T_ID("n"), T_SEMI, T_KWD("write"), T_STR("The factors of n are"),
- T_SEMI, T_ID("f"), T_OP(":="), T_NUM(2), T_SEMI, T_KWD("while"),
- T_ID("n"), T_OP("!="), T_NUM(1), T_KWD("do"), T_LPAREN,
- T_KWD("while"), T_ID("n"), T_OP("/"), T_ID("f"), T_OP("*"),
- T_ID("f"), T_OP("=="), T_ID("n"), T_KWD("do"), T_LPAREN,
- T_KWD("write"), T_ID("f"), T_SEMI, T_ID("n"), T_OP(":="),
- T_ID("n"), T_OP("/"), T_ID("f"), T_RPAREN, T_SEMI, T_ID("f"),
- T_OP(":="), T_ID("f"), T_OP("+"), T_NUM(1), T_RPAREN)
-
-
-// Primes program
-//================
-
-/*
-end := 100;
-n := 2;
-while (n < end) do {
- f := 2;
- tmp := 0;
- while ((f < n / 2 + 1) && (tmp == 0)) do {
- if ((n / f) * f == n) then { tmp := 1 } else { skip };
- f := f + 1
- };
- if (tmp == 0) then { write(n) } else { skip };
- n := n + 1
-}
-*/
-
-val primes =
- List(T_ID("end"), T_OP(":="), T_NUM(100), T_SEMI, T_ID("n"), T_OP(":="),
- T_NUM(2), T_SEMI, T_KWD("while"), T_ID("n"), T_OP("<"), T_ID("end"),
- T_KWD("do"), T_LPAREN, T_ID("f"), T_OP(":="), T_NUM(2), T_SEMI,
- T_ID("tmp"), T_OP(":="), T_NUM(0), T_SEMI, T_KWD("while"), T_ID("f"),
- T_OP("<"), T_ID("n"), T_OP("/"), T_NUM(2), T_OP("+"), T_NUM(1),
- T_OP("&&"), T_ID("tmp"), T_OP("=="), T_NUM(0), T_KWD("do"), T_LPAREN,
- T_KWD("if"), T_ID("n"), T_OP("/"), T_ID("f"), T_OP("*"), T_ID("f"),
- T_OP("=="), T_ID("n"), T_KWD("then"), T_LPAREN, T_ID("tmp"), T_OP(":="),
- T_NUM(1), T_RPAREN, T_KWD("else"), T_LPAREN, T_KWD("skip"), T_RPAREN,
- T_SEMI, T_ID("f"), T_OP(":="), T_ID("f"), T_OP("+"), T_NUM(1),
- T_RPAREN, T_SEMI, T_KWD("if"), T_ID("tmp"), T_OP("=="), T_NUM(0),
- T_KWD("then"), T_LPAREN, T_KWD("write"), T_ID("n"), T_RPAREN,
- T_KWD("else"), T_LPAREN, T_KWD("skip"), T_RPAREN, T_SEMI, T_ID("n"),
- T_OP(":="), T_ID("n"), T_OP("+"), T_NUM(1), T_RPAREN)
Binary file slides/slides01.pdf has changed
--- a/slides/slides01.tex Wed Sep 02 23:34:19 2020 +0100
+++ b/slides/slides01.tex Mon Sep 07 12:18:07 2020 +0100
@@ -5,6 +5,11 @@
\usepackage{../langs}
\usepackage{../data}
+\usepackage{tcolorbox}
+\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
+\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
+\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
+
\hfuzz=220pt
@@ -26,8 +31,24 @@
\begin{document}
+\begin{frame}[t]
+\begin{mybox}
+A physical explanation the \emph{dynamic matrix}\\
+lots of text
+\end{mybox}
+\begin{mybox2}{Test}
+A physical explanation the \emph{dynamic matrix}\\
+lots of text
+\end{mybox2}
+
+\begin{mybox3}{Test}
+A physical explanation the \emph{dynamic matrix}\\
+lots of text
+\end{mybox3}
+\end{frame}
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/solution/cw1/cw1.scala Mon Sep 07 12:18:07 2020 +0100
@@ -0,0 +1,264 @@
+// CW 1
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+abstract class Rexp
+case object ZERO extends Rexp // matches nothing
+case object ONE extends Rexp // matches the empty string
+case class CHAR(c: Char) extends Rexp // matches a character c
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
+case class STAR(r: Rexp) extends Rexp // star
+case class RANGE(cs: Set[Char]) extends Rexp // set of characters
+case class PLUS(r: Rexp) extends Rexp // plus
+case class OPT(r: Rexp) extends Rexp // optional
+case class NTIMES(r: Rexp, n: Int) extends Rexp // n-times
+case class UPNTIMES(r: Rexp, n: Int) extends Rexp // up n-times
+case class FROMNTIMES(r: Rexp, n: Int) extends Rexp // from n-times
+case class NMTIMES(r: Rexp, n: Int, m: Int) extends Rexp // between nm-times
+case class NOT(r: Rexp) extends Rexp // not
+case class CFUN(f: Char => Boolean) extends Rexp // subsuming CHAR and RANGE
+
+def CHAR(c: Char) = CFUN(d => c == d)
+val ALL = CFUN(_ => true)
+def RANGE(cs: Set[Char]) = CFUN(d => cs.contains(d))
+
+def CHAR(c: Char) = CFUN(c == _)
+val ALL = CFUN(_ => true)
+def RANGE(cs: Set[Char]) = CFUN(cs.contains(_))
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case RANGE(_) => false
+ case PLUS(r) => nullable(r)
+ case OPT(_) => true
+ case NTIMES(r, n) => if (n == 0) true else nullable(r)
+ case UPNTIMES(_, _) => true
+ case FROMNTIMES(r, n) => if (n == 0) true else nullable(r)
+ case NMTIMES(r, n, m) => if (n == 0) true else nullable(r)
+ case NOT(r) => !nullable(r)
+ case CFUN(_) => false
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case RANGE(cs) => if (cs contains c) ONE else ZERO
+ case PLUS(r) => SEQ(der(c, r), STAR(r))
+ case OPT(r) => der(c, r)
+ case NTIMES(r, n) =>
+ if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1))
+ case UPNTIMES(r, n) =>
+ if (n == 0) ZERO else SEQ(der(c, r), UPNTIMES(r, n - 1))
+ case FROMNTIMES(r, n) =>
+ if (n == 0) SEQ(der(c, r), STAR(r)) else SEQ(der(c, r), FROMNTIMES(r, n - 1))
+ case NMTIMES(r, n, m) =>
+ if (m < n) ZERO else
+ if (n == 0 && m == 0) ZERO else
+ if (n == 0) SEQ(der(c, r), UPNTIMES(r, m - 1))
+ else SEQ(der(c, r), NMTIMES(r, n - 1, m - 1))
+ case NOT(r) => NOT(der (c, r))
+ case CFUN(f) => if (f(c)) ONE else ZERO
+}
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+def ders_simp (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders_simp(s, simp(der(c, r)))
+}
+
+// main matcher function including simplification
+def matcher(r: Rexp, s: String) : Boolean =
+ nullable(ders_simp(s.toList, r))
+
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => ONE
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps (r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps (s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+val r1 = NTIMES("a", 3)
+val r2 = NTIMES(OPT("a"), 3)
+val r3 = UPNTIMES("a", 3)
+val r4 = UPNTIMES(OPT("a"), 3)
+val r5 = NMTIMES("a", 3, 5)
+val r6 = NMTIMES(OPT("a"), 3, 5)
+
+val rs = List(r1, r2, r3, r4, r5, r6)
+
+rs.map(matcher(_, ""))
+rs.map(matcher(_, "a"))
+rs.map(matcher(_, "aa"))
+rs.map(matcher(_, "aaa"))
+rs.map(matcher(_, "aaaa"))
+rs.map(matcher(_, "aaaaa"))
+rs.map(matcher(_, "aaaaaa"))
+
+println("EMAIL:")
+val LOWERCASE = ('a' to 'z').toSet
+val DIGITS = ('0' to '9').toSet
+val EMAIL = { PLUS(CFUN(LOWERCASE | DIGITS | ("_.-").toSet)) ~ "@" ~
+ PLUS(CFUN(LOWERCASE | DIGITS | (".-").toSet)) ~ "." ~
+ NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6) }
+
+val my_email = "christian.urban@kcl.ac.uk"
+
+println(EMAIL);
+println(matcher(EMAIL, my_email))
+println(ders_simp(my_email.toList, EMAIL))
+
+println("COMMENTS:")
+val ALL = CFUN((c:Char) => true)
+val COMMENT = "/*" ~ (NOT(ALL.% ~ "*/" ~ ALL.%)) ~ "*/"
+
+println(matcher(COMMENT, "/**/"))
+println(matcher(COMMENT, "/*foobar*/"))
+println(matcher(COMMENT, "/*test*/test*/"))
+println(matcher(COMMENT, "/*test/*test*/"))
+
+println("EVILS:")
+val EVIL1 = PLUS(PLUS("a" ~ "a" ~ "a"))
+val EVIL2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a?
+
+
+//40 * aaa matches
+//43 * aaa + aa does not
+//45 * aaa + a
+
+println("EVIL1:")
+println(matcher(EVIL1, "aaa" * 40))
+println(matcher(EVIL1, "aaa" * 43 + "aa"))
+println(matcher(EVIL1, "aaa" * 45 + "a"))
+println("EVIL2:")
+println(matcher(EVIL2, "aaa" * 40))
+println(matcher(EVIL2, "aaa" * 43 + "aa"))
+println(matcher(EVIL2, "aaa" * 45 + "a"))
+
+println("TEST for bug pointed out by Filips Ivanovs")
+val test = NMTIMES(CFUN(LOWERCASE | Set('.')), 2, 6)
+
+println(matcher(test,"a"))
+println(matcher(test,"ab"))
+println(matcher(test,"abcdef"))
+println(matcher(test,"abc"))
+println(matcher(test,"...."))
+println(matcher(test,"asdfg"))
+println(matcher(test,"abcdefg"))
+
+println(test)
+println(ders_simp("a".toList, test))
+println(ders_simp("aa".toList, test))
+println(ders_simp("aaa".toList, test))
+println(ders_simp("aaaa".toList, test))
+println(ders_simp("aaaaa".toList, test))
+println(ders_simp("aaaaaa".toList, test))
+println(ders_simp("aaaaaaa".toList, test))
+
+def TEST(s: String, r: Rexp) = {
+ println("Rexp |" + s + "|")
+ println("Derivative:\n" + ders_simp(s.toList, r))
+ println("Is Nullable: " + nullable(ders_simp(s.toList, r)))
+ Console.readLine
+}
+
+
+val ALL2 = "a" | "f"
+val COMMENT2 = ("/*" ~ NOT(ALL2.% ~ "*/" ~ ALL2.%) ~ "*/")
+
+println("1) TEST TEST")
+TEST("", COMMENT2)
+TEST("/", COMMENT2)
+TEST("/*", COMMENT2)
+TEST("/*f", COMMENT2)
+TEST("/*f*", COMMENT2)
+TEST("/*f*/", COMMENT2)
+TEST("/*f*/ ", COMMENT2)
+
+val ALL3 = "a" | "f"
+val COMMENT3 = ("/" ~ NOT(ALL3.% ~ "&" ~ ALL3.%) ~ "&")
+
+println("2) TEST TEST")
+TEST("", COMMENT3)
+TEST("/", COMMENT3)
+TEST("/", COMMENT3)
+TEST("/f", COMMENT3)
+TEST("/f&", COMMENT3)
+TEST("/f& ", COMMENT3)
+
+//test: ("a" | "aa") ~ ("a" | "aa")*
+val auxEVIL3 = ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
+val EVIL3 = SEQ(auxEVIL3, STAR(auxEVIL3))
+val EVIL3p = FROMNTIMES(auxEVIL3, 1)
+
+
+for (i <- 1 to 100 by 1) {
+ println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL3, "a" * i))) + " size: " +
+ size(ders(("a" * i).toList, EVIL3)))
+}
+
+
+
+val t1 = EVIL3
+val t2 = simp(der('a', t1))
+val t3 = simp(der('a', t2))
+val t4 = simp(der('a', t3))
+
+val t1 = EVIL3p
+val t2 = simp(der('a', t1))
+val t3 = simp(der('a', t2))
+val t4 = simp(der('a', t3))
--- a/style.sty Wed Sep 02 23:34:19 2020 +0100
+++ b/style.sty Mon Sep 07 12:18:07 2020 +0100
@@ -52,6 +52,7 @@
%%% url pointers
+\newcommand{\hr}[1]{\href{#1}{\faHandPointRight[regular]}}
\newcommand{\here}[1]{\marginnote{\href{#1}{\faHandPointRight[regular]}}}
\newcommand{\video}[1]{\marginnote{\href{#1}{\faFilm}}}
\newcommand{\alert}{\reversemarginpar\marginpar{\mbox{}\hfill\textcolor{red}{\faExclamationTriangle}}}