--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/detokenise.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,42 @@
+// Detokenising the ouput of Tokeniser
+//=====================================
+//
+// call with
+//
+// scala detokenise.scala fib.tks
+//
+// scala detokenise.scala loops.tks
+
+object Detokenise {
+
+import java.io._
+import scala.util._
+
+abstract class Token extends Serializable
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(n: Int) extends Token
+case class T_KWD(s: String) extends Token
+case class T_STR(s: String) extends Token
+
+def deserialise[T](fname: String) : Try[T] = {
+ //import scala.util.Using
+ //Using(new ObjectInputStream(new FileInputStream(fname))) {
+ // in => in.readObject.asInstanceOf[T]
+ //}
+ Try(new ObjectInputStream(new FileInputStream(fname))).get {
+ in => in.readObject.asInstanceOf[T]
+ }
+}
+
+def main(args: Array[String]) = {
+ val fname = args(0)
+ val tks = deserialise[List[Token]](fname).getOrElse(Nil)
+ println(s"Reading back from ${fname}:\n${tks.mkString(", ")}")
+}
+
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/i.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,492 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case object ALLC extends Rexp // recognises any character
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp // negation of a regular expression
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case ALLC => EMPTY
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// regular expression for specifying
+// ranges of characters
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+
+// one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+// many alternatives
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+// repetitions
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+type Rule[T] = (Rexp, List[Char] => T)
+
+case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) {
+
+ def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, action(t))
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), action, s, t ::: List(c))
+ }
+
+ def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
+ val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
+ if (somes == Nil) Right(s.mkString)
+ else Left(somes sortBy (_._1.length) head)
+ }
+
+ def tokenize(cs: List[Char]) : List[T] = cs match {
+ case Nil => Nil
+ case _ => one_token(cs) match {
+ case Left((rest, token)) => token :: tokenize(rest)
+ case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
+ }
+ }
+
+ def fromString(s: String) : List[T] =
+ tokenize(s.toList).filterNot(excl.contains(_))
+
+ def fromFile(name: String) : List[T] =
+ fromString(io.Source.fromFile(name).mkString)
+
+}
+
+
+// parser combinators with input type I and return type T
+
+abstract class Parser[I <% Seq[_], T] {
+ def parse(ts: I): Set[(T, I)]
+
+ def parse_all(ts: I) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
+
+ def parse_single(ts: I) : T = parse_all(ts).toList match {
+ case t::Nil => t
+ case _ => { println ("Parse Error") ; sys.exit(-1) }
+ }
+
+ def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
+ def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
+}
+
+class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+ def parse(sb: I) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+}
+
+class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
+ def parse(sb: I) =
+ for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+
+
+// A parser and evaluator for teh while language
+//
+//:load matcher.scala
+//:load parser3.scala
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[Rule[Token]] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+case class Write(s: String) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("false") ==> ((_) => False: BExp)) ||
+ (T_LPAREN ~> BExp <~ T_RPAREN) ||
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
+ (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+// compiler
+val beginning = """
+.class public XXX.XXX
+.super java/lang/Object
+
+.method public <init>()V
+ aload_0
+ invokenonvirtual java/lang/Object/<init>()V
+ return
+.end method
+
+.method public static write(I)V
+ .limit locals 5
+ .limit stack 5
+ iload 0
+ getstatic java/lang/System/out Ljava/io/PrintStream;
+ swap
+ invokevirtual java/io/PrintStream/println(I)V
+ return
+.end method
+
+
+.method public static main([Ljava/lang/String;)V
+ .limit locals 200
+ .limit stack 200
+
+"""
+
+val ending = """
+
+ return
+
+.end method
+"""
+
+// for generating new labels
+var counter = -1
+
+def Fresh(x: String) = {
+ counter += 1
+ x ++ "_" ++ counter.toString()
+}
+
+type Env = Map[String, String]
+type Instrs = List[String]
+
+def compile_aexp(a: AExp, env : Env) : Instrs = a match {
+ case Num(i) => List("ldc " + i.toString + "\n")
+ case Var(s) => List("iload " + env(s) + "\n")
+ case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
+ case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
+ case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
+}
+
+def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
+ case True => Nil
+ case False => List("goto " + jmp + "\n")
+ case Relop("=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
+ case Relop("!=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
+ case Relop("<", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
+}
+
+
+def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
+ case Skip => (Nil, env)
+ case Assign(x, a) => {
+ val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
+ (compile_aexp(a, env) ++
+ List("istore " + index + "\n"), env + (x -> index))
+ }
+ case If(b, bl1, bl2) => {
+ val if_else = Fresh("If_else")
+ val if_end = Fresh("If_end")
+ val (instrs1, env1) = compile_bl(bl1, env)
+ val (instrs2, env2) = compile_bl(bl2, env1)
+ (compile_bexp(b, env, if_else) ++
+ instrs1 ++
+ List("goto " + if_end + "\n") ++
+ List("\n" + if_else + ":\n\n") ++
+ instrs2 ++
+ List("\n" + if_end + ":\n\n"), env2)
+ }
+ case While(b, bl) => {
+ val loop_begin = Fresh("Loop_begin")
+ val loop_end = Fresh("Loop_end")
+ val (instrs1, env1) = compile_bl(bl, env)
+ (List("\n" + loop_begin + ":\n\n") ++
+ compile_bexp(b, env, loop_end) ++
+ instrs1 ++
+ List("goto " + loop_begin + "\n") ++
+ List("\n" + loop_end + ":\n\n"), env1)
+ }
+ case Write(x) =>
+ (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
+}
+
+def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
+ case Nil => (Nil, env)
+ case s::bl => {
+ val (instrs1, env1) = compile_stmt(s, env)
+ val (instrs2, env2) = compile_bl(bl, env1)
+ (instrs1 ++ instrs2, env2)
+ }
+}
+
+def compile(input: String) : String = {
+ val class_name = input.split('.')(0)
+ val tks = Tok.fromFile(input)
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+}
+
+
+def compile_to(input: String, output: String) = {
+ val fw = new java.io.FileWriter(output)
+ fw.write(compile(input))
+ fw.close()
+}
+
+//
+val tks = Tok.fromString("x := x + 1")
+val ast = Stmt.parse_single(tks)
+println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
+
+
+
+//examples
+
+compile_to("loops.while", "loops.j")
+//compile_to("fib.while", "fib.j")
+
+
+// testing cases for time measurements
+/*
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+// for testing
+import scala.sys.process._
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+};
+write x;
+write y;
+write z
+"""
+
+
+def compile_test(n: Int) : Unit = {
+ val class_name = "LOOP"
+ val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+ val fw = new java.io.FileWriter(class_name + ".j")
+ fw.write(assembly)
+ fw.close()
+ val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
+ println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
+}
+
+List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
+
+
+
+// javabyte code assmbler
+//
+// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
+
+*/
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/lexer.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,280 @@
+// A simple lexer inspired by work of Sulzmann & Lu
+//==================================================
+
+
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+// regular expressions including records
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class RECD(x: String, r: Rexp) extends Rexp
+
+// values
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]): Rexp = s match {
+ case Nil => ONE
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp =
+ charlist2rexp(s.toList)
+
+implicit def RexpOps(r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+ def $ (r: Rexp) = RECD(s, r)
+}
+
+def nullable(r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case RECD(_, r1) => nullable(r1)
+}
+
+def der(c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case RECD(_, r1) => der(c, r1)
+}
+
+
+// extracts a string from value
+def flatten(v: Val) : String = v match {
+ case Empty => ""
+ case Chr(c) => c.toString
+ case Left(v) => flatten(v)
+ case Right(v) => flatten(v)
+ case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+ case Stars(vs) => vs.map(flatten).mkString
+ case Rec(_, v) => flatten(v)
+}
+
+
+// extracts an environment from a value;
+// used for tokenise a string
+def env(v: Val) : List[(String, String)] = v match {
+ case Empty => Nil
+ case Chr(c) => Nil
+ case Left(v) => env(v)
+ case Right(v) => env(v)
+ case Sequ(v1, v2) => env(v1) ::: env(v2)
+ case Stars(vs) => vs.flatMap(env)
+ case Rec(x, v) => (x, flatten(v))::env(v)
+}
+
+// The Injection Part of the lexer
+
+def mkeps(r: Rexp) : Val = r match {
+ case ONE => Empty
+ case ALT(r1, r2) =>
+ if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ case STAR(r) => Stars(Nil)
+ case RECD(x, r) => Rec(x, mkeps(r))
+}
+
+def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ case (CHAR(d), Empty) => Chr(c)
+ case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+}
+
+// some "rectification" functions for simplification
+def F_ID(v: Val): Val = v
+def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ case Right(v) => Right(f2(v))
+ case Left(v) => Left(f1(v))
+}
+def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+}
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+ (v:Val) => Sequ(f1(Empty), f2(v))
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+ (v:Val) => Sequ(f1(v), f2(Empty))
+def F_RECD(f: Val => Val) = (v:Val) => v match {
+ case Rec(x, v) => Rec(x, f(v))
+}
+def F_ERROR(v: Val): Val = throw new Exception("error")
+
+def simp(r: Rexp): (Rexp, Val => Val) = r match {
+ case ALT(r1, r2) => {
+ val (r1s, f1s) = simp(r1)
+ val (r2s, f2s) = simp(r2)
+ (r1s, r2s) match {
+ case (ZERO, _) => (r2s, F_RIGHT(f2s))
+ case (_, ZERO) => (r1s, F_LEFT(f1s))
+ case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+ else (ALT (r1s, r2s), F_ALT(f1s, f2s))
+ }
+ }
+ case SEQ(r1, r2) => {
+ val (r1s, f1s) = simp(r1)
+ val (r2s, f2s) = simp(r2)
+ (r1s, r2s) match {
+ case (ZERO, _) => (ZERO, F_ERROR)
+ case (_, ZERO) => (ZERO, F_ERROR)
+ case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+ case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+ case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+ }
+ }
+ case r => (r, F_ID)
+}
+
+// lexing functions including simplification
+def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+ case Nil => if (nullable(r)) mkeps(r) else
+ { throw new Exception("lexing error") }
+ case c::cs => {
+ val (r_simp, f_simp) = simp(der(c, r))
+ inj(r, c, f_simp(lex_simp(r_simp, cs)))
+ }
+}
+
+def lexing_simp(r: Rexp, s: String) =
+ env(lex_simp(r, s.toList))
+
+
+// The Lexing Rules for the Fun Language
+
+def PLUS(r: Rexp) = r ~ r.%
+
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => ZERO
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SYM ~ (SYM | DIGIT).%
+val NUM = PLUS(DIGIT)
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+val SEMI: Rexp = ";"
+val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+val WHITESPACE = PLUS(" " | "\n" | "\t")
+val RPAREN: Rexp = "{"
+val LPAREN: Rexp = "}"
+val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+val WHILE_REGS = (("k" $ KEYWORD) |
+ ("i" $ ID) |
+ ("o" $ OP) |
+ ("n" $ NUM) |
+ ("s" $ SEMI) |
+ ("str" $ STRING) |
+ ("p" $ (LPAREN | RPAREN)) |
+ ("w" $ WHITESPACE)).%
+
+
+// Two Simple While Tests
+//========================
+
+println("test: read n")
+
+val prog0 = """read n"""
+println(lexing_simp(WHILE_REGS, prog0))
+
+println("test: read n; write n ")
+
+val prog1 = """read n; write n"""
+println(lexing_simp(WHILE_REGS, prog1))
+
+
+// Bigger Tests
+//==============
+
+// escapes strings and prints them out as "", "\n" and so on
+def esc(raw: String): String = {
+ import scala.reflect.runtime.universe._
+ Literal(Constant(raw)).toString
+}
+
+def escape(tks: List[(String, String)]) =
+ tks.map{ case (s1, s2) => (s1, esc(s2))}
+
+val prog2 = """
+write "Fib";
+read n;
+minus1 := 0;
+minus2 := 1;
+while n > 0 do {
+ temp := minus2;
+ minus2 := minus1 + minus2;
+ minus1 := temp;
+ n := n - 1
+};
+write "Result";
+write minus2
+"""
+
+println("lexing Fib")
+println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
+
+
+
+val prog3 = """
+start := 1000;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+}
+"""
+
+println("lexing Loops")
+println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/mllex.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,109 @@
+:load matcher.scala
+
+
+// some regular expressions
+val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
+ "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
+ "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
+ "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
+ "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
+ "sharing", "sig", "signature", "struct", "structure", "then", "type",
+ "val", "where", "while", "with", "withtype"))
+
+val DIGITS = RANGE("0123456789")
+val NONZERODIGITS = RANGE("123456789")
+
+val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
+val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES)
+
+val ALL = ALTS(KEYWORDS, INTEGERS)
+
+val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/")
+
+
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n".toList))
+val OPS = RANGE("+-*".toList)
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_NUM extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class T_NT(s: String, rhs: List[Token]) extends Token
+
+def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] =
+ tokenize(rs, s.toList).filterNot(_ match {
+ case T_WHITESPACE => true
+ case _ => false
+ })
+
+
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+tokenize_file(Nil, "nominal_library.ML")
+
+
+
+
+type Grammar = List[(String, List[Token])]
+
+// grammar for arithmetic expressions
+val grammar =
+ List ("E" -> List(T_NUM),
+ "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)),
+ "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)),
+ "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)),
+ "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN))
+
+def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match {
+ case (_, Nil) => true
+ case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2)
+ case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2)
+ case _ => false
+}
+
+def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
+ ts1 match {
+ case Nil => None
+ case t::ts =>
+ if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
+ else chop(ts, prefix, t::ts2)
+ }
+
+// examples
+chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
+chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
+
+def replace[A](ts: List[A], out: List[A], in: List [A]) =
+ chop(ts, out, Nil) match {
+ case None => None
+ case Some((before, after)) => Some(before ::: in ::: after)
+ }
+
+def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match {
+ case List(T_NT("E", tree)) => { println(tree); true }
+ case _ => {
+ val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs)))
+ tss.flatten.exists(parse1(g, _))
+ }
+}
+
+
+println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1"))
+println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)"))
+println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)"))
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/parser1.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,88 @@
+// A naive bottom-up parser with backtracking
+//
+// Needs:
+// :load matcher.scala
+
+// some regular expressions
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_NUM extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class NT(s: String) extends Token
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+type Grammar = List[(String, List[Token])]
+
+// grammar for arithmetic expressions
+val grammar =
+ List ("F" -> List(T_NUM),
+ "E" -> List(T_NUM),
+ "E" -> List(NT("E"), T_OP("+"), NT("E")),
+ "E" -> List(NT("E"), T_OP("-"), NT("E")),
+ "E" -> List(NT("E"), T_OP("*"), NT("E")),
+ "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
+
+
+def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
+ ts1 match {
+ case Nil => None
+ case t::ts =>
+ if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
+ else chop(ts, prefix, t::ts2)
+ }
+
+// examples for chop
+chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
+chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
+
+def replace[A](ts: List[A], out: List[A], in: List [A]) =
+ chop(ts, out, Nil) match {
+ case None => None
+ case Some((before, after)) => Some(before ::: in ::: after)
+ }
+
+def parse(g: Grammar, ts: List[Token]) : Boolean = {
+ println(ts)
+ if (ts == List(NT("E"))) true
+ else {
+ val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
+ tss.flatten.exists(parse(g, _))
+ }
+}
+
+def parser(g: Grammar, s: String) = {
+ println("\n")
+ parse(g, Tok.fromString(s))
+}
+
+
+
+parser(grammar, "2 + 3 * 4 + 1")
+parser(grammar, "(2 + 3) * (4 + 1)")
+parser(grammar, "(2 + 3) * 4 (4 + 1)")
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/parser4.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,82 @@
+
+// parser combinators with input type I and return type T
+
+case class SubString(s: String, l: Int, h: Int) {
+ def low = l
+ def high = h
+ def length = h - l
+ def substring(l: Int = l, h: Int = h) = s.slice(l, h)
+ def set(low: Int = l, high: Int = h) = SubString(s, low, high)
+
+}
+
+type Ctxt = List[(String, SubString)]
+
+abstract class Parser[T] {
+
+ def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)]
+
+ def parse_all(s: String) : Set[T] =
+ for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head
+
+ def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
+}
+
+class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
+ def parse(sb: SubString, ctxt: Ctxt) =
+ for ((head1, tail1) <- p.parse(sb, ctxt);
+ (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
+}
+
+class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
+}
+
+class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
+ def parse(sb: SubString, ctxt: Ctxt) =
+ for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
+}
+
+case class SubStringParser(s: String) extends Parser[SubString] {
+ val n = s.length
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s)
+ Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n)))
+ else Set()
+ }
+}
+
+implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring())
+
+class IgnLst[T](p: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ if (sb.length == 0) Set()
+ else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
+ yield (head, tail.set(high = tail.high + 1))
+ }
+}
+
+class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ val should_trim = ctxt.contains (nt, sb)
+ if (should_trim && sb.length == 0) Set()
+ else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
+ else p.parse(sb, (nt, sb)::ctxt)
+ }
+}
+
+// ambigous grammar
+lazy val E: Parser[Int] =
+ new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} ||
+ (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
+ ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} ||
+ "0" ==> { (s) => 0 } ||
+ "1" ==> { (s) => 1 } ||
+ "2" ==> { (s) => 2 } ||
+ "3" ==> { (s) => 3 })
+
+println(E.parse_all("1+2*3+3"))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re-alt.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,115 @@
+trait RegExp {
+ def nullable: Boolean
+ def derive(c: Char): RegExp
+}
+
+case object Empty extends RegExp {
+ def nullable = false
+ def derive(c: Char) = Empty
+}
+
+case object Eps extends RegExp {
+ def nullable = true
+ def derive(c: Char) = Empty
+}
+
+case class Str(s: String) extends RegExp {
+ def nullable = s.isEmpty
+ def derive(c: Char) =
+ if (s.isEmpty || s.head != c) Empty
+ else Str(s.tail)
+}
+
+case class Cat(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable && s.nullable
+ def derive(c: Char) =
+ if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c))
+ else Cat(r.derive(c), s)
+}
+
+case class Star(r: RegExp) extends RegExp {
+ def nullable = true
+ def derive(c: Char) = Cat(r.derive(c), this)
+}
+
+case class Or(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable || s.nullable
+ def derive(c: Char) = Or(r.derive(c), s.derive(c))
+}
+
+case class And(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable && s.nullable
+ def derive(c: Char) = And(r.derive(c), s.derive(c))
+}
+
+case class Not(r: RegExp) extends RegExp {
+ def nullable = !r.nullable
+ def derive(c: Char) = Not(r.derive(c))
+}
+
+
+
+
+object Matcher {
+ def matches(r: RegExp, s: String): Boolean = {
+ if (s.isEmpty) r.nullable
+ else matches(r.derive(s.head), s.tail)
+ }
+}
+
+
+object Pimps {
+ implicit def string2RegExp(s: String) = Str(s)
+
+ implicit def regExpOps(r: RegExp) = new {
+ def | (s: RegExp) = Or(r, s)
+ def & (s: RegExp) = And(r, s)
+ def % = Star(r)
+ def %(n: Int) = rep(r, n)
+ def ? = Or(Eps, r)
+ def ! = Not(r)
+ def ++ (s: RegExp) = Cat(r, s)
+ def ~ (s: String) = Matcher.matches(r, s)
+ }
+
+ implicit def stringOps(s: String) = new {
+ def | (r: RegExp) = Or(s, r)
+ def | (r: String) = Or(s, r)
+ def & (r: RegExp) = And(s, r)
+ def & (r: String) = And(s, r)
+ def % = Star(s)
+ def % (n: Int) = rep(Str(s), n)
+ def ? = Or(Eps, s)
+ def ! = Not(s)
+ def ++ (r: RegExp) = Cat(s, r)
+ def ++ (r: String) = Cat(s, r)
+ def ~ (t: String) = Matcher.matches(s, t)
+ }
+
+ def rep(r: RegExp, n: Int): RegExp =
+ if (n <= 0) Star(r)
+ else Cat(r, rep(r, n - 1))
+}
+
+
+object Test {
+ import Pimps._
+
+ val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ val int = ("+" | "-").? ++ digit.%(1)
+ val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).?
+
+ def main(args: Array[String]) {
+ val ints = List("0", "-4534", "+049", "99")
+ val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
+ val errs = List("", "-", "+", "+-1", "-+2", "2-")
+
+ ints.foreach(s => assert(int ~ s))
+ reals.foreach(s => assert(!(int ~ s)))
+ errs.foreach(s => assert(!(int ~ s)))
+
+ ints.foreach(s => assert(real ~ s))
+ reals.foreach(s => assert(real ~ s))
+ errs.foreach(s => assert(!(real ~ s)))
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re-internal.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,17 @@
+
+// measures the time a function needs
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+for (i <- 1 to 10001 by 300) {
+ val re = ("((a?){" + i + "})(a{" + i + "})")
+ println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re))))
+}
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re-sulzmann-partial.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,136 @@
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+abstract class Pat
+
+case class VAR(x: String, r: Rexp) extends Pat
+case class GRP(x: String, p: Pat) extends Pat
+case class PSEQ(p1: Pat, p2: Pat) extends Pat
+case class PALT(p1: Pat, p2: Pat) extends Pat
+case class PSTAR(p: Pat) extends Pat
+
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+def down (p: Pat) : Rexp = p match {
+ case VAR(x: String, w: String, r: Rexp) => r
+ case GRP(x: String, w: String, p: Pat) => down(p)
+ case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2))
+ case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2))
+ case PSTAR(p: Pat) => STAR(down(p))
+}
+
+def patnullable (p: Pat) : Boolean = p match {
+ case PVar(_, r) => nullable(r)
+ case PSEQ(p1, p2) => patnullable(p1) && patnullable(p2)
+ case PCHOICE(p1, p2) => patnullable(p1) || patnullable(p2)
+ case PSTAR(p) => true
+ case PatVar(_, p) => patnullable(p)
+}
+
+//type Env = Set[List[(String, String)]]
+type Env = Map[Int, String]
+
+def update(n: Int, c: Char) (env: Env) =
+ env + (n -> (env.getOrElse(n, "") + c.toString))
+
+def pderivPat (p: Pat, c: Char) : Set[(Pat, Env => Env)] = p match {
+ case PVar(n: Int, r: Rexp) => {
+ val pds = pderiv(r, c)
+ if (pds.isEmpty) Set()
+ else Set((PVar(n, toRexp(pds.toList)), update(n, c)))
+ }
+ case PSEQ(p1: Pat, p2: Pat) => {
+ val pats : Set[(Pat, Env => Env)] =
+ for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, p2), f)
+ if (nullable(strip(p1))) pats ++ pderivPat(p2, c)
+ else pats
+ }
+ case PCHOICE(p1: Pat, p2: Pat) => pderivPat(p1, c) ++ pderivPat(p2, c)
+ case PSTAR(p1: Pat) =>
+ for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, PSTAR(p1)), f)
+ case PatVar(n: Int, p1: Pat) =>
+ for ((p, f) <- pderivPat(p1, c)) yield (PatVar(n, p), f compose (update (n, c)))
+}
+
+
+val p2 = PSEQ(PVar(1, STAR("A")), PVar(2, STAR("A")))
+pderivPat(p2, 'A').mkString("\n")
+
+
+def greedy_aux(env: Set[(Pat, Env)], w: List[Char]) : Set[Env] = w match {
+ case Nil =>
+ for ((p, e) <- env if patnullable(p)) yield e
+ case c::cs => {
+ val env2 = for ((p, e) <- env; (p1, f) <- pderivPat(p, c)) yield (p1, f(e))
+ greedy_aux(env2, cs)
+ }
+}
+
+def greedy(p: Pat, w: String) = {
+ val res = greedy_aux(Set((p, Map())), w.toList)
+ if (res.isEmpty) None else Some(res)
+}
+
+// some convenience for typing in regular expressions
+def charlist2rexp (s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps (r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps (s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+implicit def PatOps (p: Pat) = new {
+ def | (q: Pat) = PALT(p, q)
+ def % = PSTAR(p)
+ def ~ (q: Pat) = PSEQ(p, q)
+}
+
+val p3 = PSTAR(PSEQ(PVar(1, "A"), PVar(2, "B")))
+
+greedy2(Set((p3, Map())), "ABAB".toList)
+
+
+val p4 = PVar(1, "A")
+greedy2(Set((p4, Map())), "A".toList)
+
+val p5 = PSEQ(PVar(1, "A"), PVar(1, "B"))
+greedy2(Set((p5, Map())), "AB".toList)
+
+val res = pderivPat(p5, 'A')
+for ((_, f) <- res) yield f(Map())
+
+
+val p6 = PatVar(4,PSTAR(PCHOICE(PCHOICE(PVar(1, "A"), PVar(2, "AB")), PVar(3, "B"))))
+
+greedy(p6, "ABA")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re-sulzmann.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,164 @@
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+abstract class Pat
+
+case class VAR(x: String, w: String, r: Rexp) extends Pat
+case class GRP(x: String, w: String, p: Pat) extends Pat
+case class PSEQ(p1: Pat, p2: Pat) extends Pat
+case class PALT(p1: Pat, p2: Pat) extends Pat
+case class PSTAR(p: Pat) extends Pat
+
+object VAR { def apply(x: String, r: Rexp) : VAR = VAR(x, "", r) }
+object GRP { def apply(x: String, p: Pat) : GRP = GRP(x, "", p) }
+
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+}
+
+def down (p: Pat) : Rexp = p match {
+ case VAR(x: String, w: String, r: Rexp) => r
+ case GRP(x: String, w: String, p: Pat) => down(p)
+ case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2))
+ case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2))
+ case PSTAR(p: Pat) => STAR(down(p))
+}
+
+def empty (p: Pat) : Pat = p match {
+ case VAR(x: String, w: String, r: Rexp) =>
+ if (nullable(r)) VAR(x, w, EMPTY)
+ else VAR(x, w, NULL)
+ case GRP(x: String, w: String, p: Pat) => GRP(x, w, empty(p))
+ case PSEQ(p1: Pat, p2: Pat) => PSEQ(empty(p1), empty(p2))
+ case PALT(p1: Pat, p2: Pat) => PALT(empty(p1), empty(p2))
+ case PSTAR(p: Pat) => PSTAR(empty(p))
+}
+
+def patder (c: Char, p: Pat) : Pat = p match {
+ case VAR(x: String, w: String, r: Rexp) => VAR(x, w + c, der(c, r))
+ case GRP(x: String, w: String, p: Pat) => GRP(x, w + c, patder(c, p))
+ case PSEQ(p1: Pat, p2: Pat) =>
+ if (nullable(down(p1))) PALT(PSEQ(patder(c, p1), p2), PSEQ(empty(p1), patder(c, p2)))
+ else PSEQ(patder(c, p1), p2)
+ case PALT(p1: Pat, p2: Pat) => PALT(patder(c, p1), patder(c, p2))
+ case PSTAR(p: Pat) => PSEQ(patder(c, p), PSTAR(p))
+}
+
+def patders (s: List[Char], p: Pat) : Pat = s match {
+ case Nil => p
+ case c::s => patders(s, patder(c, p))
+}
+
+type Env = Set[List[(String, String)]]
+
+def special (p: Pat, env: Env) : Env =
+ if (env == Set() && nullable(down(p))) Set(Nil) else env
+
+def env (p: Pat) : Env = p match {
+ case VAR(x: String, w: String, r: Rexp) =>
+ if (nullable(r)) Set(List((x, w))) else Set()
+ case GRP(x: String, w: String, p1: Pat) =>
+ special(p, for (e <- env(p1)) yield List((x, w)) ++ e)
+ case PSEQ(p1: Pat, p2: Pat) =>
+ special(p, for (e1 <- env(p1); e2 <- env(p2)) yield (e1 ++ e2))
+ case PALT(p1: Pat, p2: Pat) => special(p, env(p1) ++ env(p2))
+ case PSTAR(p1: Pat) => special(p, env(p1))
+}
+
+def matcher (p: Pat, s: String) = env(empty(patders(s.toList, p)))
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp (s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps (r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps (s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+implicit def PatOps (p: Pat) = new {
+ def | (q: Pat) = PALT(p, q)
+ def % = PSTAR(p)
+ def ~ (q: Pat) = PSEQ(p, q)
+}
+
+
+val p0 = VAR("x", "A")
+
+patders("A".toList, p0)
+matcher(p0, "A")
+
+val p1 = VAR("x", "A") | VAR("y", "A") | VAR("z", "B")
+
+patders("A".toList, p1)
+matcher(p1, "A")
+matcher(p1, "B")
+
+val p2 = VAR("x", "AB")
+matcher(p2, "AB")
+matcher(p2, "AA")
+
+val p3 = VAR("x", "A" ~ "B")
+matcher(p3, "AB")
+matcher(p3, "AA")
+
+val p4 = VAR("x", "A") ~ VAR("y", "B")
+matcher(p4, "AB")
+matcher(p4, "AA")
+
+val p5 = VAR("x", "A".%)
+matcher(p5, "A")
+matcher(p5, "AA")
+matcher(p5, "")
+matcher(p5, "AAA")
+
+val p6 = VAR("x", "A").%
+matcher(p6, "A")
+matcher(p6, "AA")
+matcher(p6, "")
+matcher(p6, "AAA")
+
+val p7 = (VAR("x", "A") | VAR("y", "AB") | VAR("z", "B")).%
+matcher(p7, "AB")
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,123 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGIT = RANGE("0123456789".toList)
+val NONZERODIGIT = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val WHITESPACES = PLUS(WHITESPACE)
+
+val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+
+// an example list of regular expressions
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, t)
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), s, t ::: List(c))
+ }
+
+def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
+ val somes = regs.map { munch(_, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
+ case Nil => Nil
+ case _ => one_string(regs, s) match {
+ case (rest, s) => s.mkString :: tokenize(regs, rest)
+ }
+}
+
+//examples
+println(tokenize(regs, "if true then then 42 else +".toList))
+println(tokenize(regs, "if+true+then+then+42+else +".toList))
+println(tokenize(regs, "ifff if 34 34".toList))
+println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
+println(tokenize(regs, "/* if true then */ then 42 else +".toList))
+//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re0.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,119 @@
+import scala.annotation.tailrec
+import scala.language.implicitConversions
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case object ALLCHAR extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class STR(s: String) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+case class REP(r: Rexp, n: Int) extends Rexp
+
+// some convenience for typing in regular expressions
+implicit def string2rexp(s : String) : Rexp = STR(s)
+
+implicit def RexpOps(r: Rexp) : Rexp = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def %(n: Int) = REP(r, n)
+ def %%(n: Int) = SEQ(REP(r, n), STAR(r))
+ def ? = ALT(EMPTY, r)
+ def unary_! = NOT(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) : Rexp = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def %(n: Int) = REP(s, n)
+ def %%(n: Int) = SEQ(REP(s, n), STAR(s))
+ def ? = ALT(EMPTY, s)
+ def unary_! = NOT(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case ALLCHAR => false
+ case CHAR(_) => false
+ case STR(s) => s.isEmpty
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+ case REP(r, i) => if (i == 0) true else nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case ALLCHAR => EMPTY
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail)
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+ case REP(r, i) =>
+ if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1))
+}
+
+
+// derivative w.r.t. a string (iterates der)
+@tailrec
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+//examples
+val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+val int = ("+" | "-").? ~ digit.%%(1)
+val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
+
+val ints = List("0", "-4534", "+049", "99")
+val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
+val errs = List("", "-", "+", "+-1", "-+2", "2-")
+
+ints.map(s => matcher(int, s))
+reals.map(s => matcher(int, s))
+errs.map(s => matcher(int, s))
+
+ints.map(s => matcher(real, s))
+reals.map(s => matcher(real, s))
+errs.map(s => matcher(real, s))
+
+
+
+def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+for (i <- 1 to 12000 by 500) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re1.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,154 @@
+// A simple matcher for basic regular expressions
+
+abstract class Rexp
+case object ZERO extends Rexp // matches nothing
+case object ONE extends Rexp // matches an empty string
+case class CHAR(c: Char) extends Rexp // matches a character c
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
+case class STAR(r: Rexp) extends Rexp // star
+
+// nullable function: tests whether a regular
+// expression can recognise the empty string
+def nullable(r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+// the derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+}
+
+// the derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// the main matcher function
+def matcher(r: Rexp, s: String) : Boolean =
+ nullable(ders(s.toList, r))
+
+
+// examples from the homework
+
+val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
+der('a', r)
+der('b', r)
+der('c', r)
+
+val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
+der('x', r2)
+der('y', der('x', r2))
+der('z', der('y', der('x', r2)))
+
+
+// the optional regular expression (one or zero times)
+def OPT(r: Rexp) = ALT(r, ONE)
+
+// the n-times regular expression (explicitly expanded)
+def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+ case 0 => ONE
+ case 1 => r
+ case n => SEQ(r, NTIMES(r, n - 1))
+}
+
+
+// Test Cases
+
+// the evil regular expression a?{n} a{n}
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+
+// the evil regular expression (a*)*b
+val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+
+// for measuring time
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start) / (i * 1.0e9)
+}
+
+
+// test: (a?{n}) (a{n})
+println("Test (a?{n}) (a{n})")
+
+for (i <- 0 to 20 by 2) {
+ println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
+}
+
+// test: (a*)* b
+println("Test (a*)* b")
+
+for (i <- 0 to 20 by 2) {
+ println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
+}
+
+
+// the size of a regular expressions - for testing purposes
+def size(r: Rexp) : Int = r match {
+ case ZERO => 1
+ case ONE => 1
+ case CHAR(_) => 1
+ case ALT(r1, r2) => 1 + size(r1) + size(r2)
+ case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ case STAR(r) => 1 + size(r)
+}
+
+// the expicit expansion in EVIL1(n) increases
+// drastically its size
+
+size(EVIL1(1)) // 5
+size(EVIL1(3)) // 17
+size(EVIL1(5)) // 29
+size(EVIL1(7)) // 41
+size(EVIL1(20)) // 119
+
+// given a regular expression and building successive
+// derivatives might result into bigger and bigger
+// regular expressions...here is an example for this:
+
+// (a+b)* o a o b o (a+b)*
+val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b')))
+val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux)))
+
+size(ders("".toList, BIG)) // 13
+size(ders("ab".toList, BIG)) // 51
+size(ders("abab".toList, BIG)) // 112
+size(ders("ababab".toList, BIG)) // 191
+size(ders("abababab".toList, BIG)) // 288
+size(ders("ababababab".toList, BIG)) // 403
+size(ders("abababababab".toList, BIG)) // 536
+
+
+size(ders(("ab" * 200).toList, BIG)) // 366808
+
+for (i <- 0 to 200 by 10) {
+ println(f"$i: ${time_needed(2, matcher(BIG, "ab" * i))}%.5f")
+}
+
+
+
+
+//////////////////////////////////////
+def concat(A: Set[String], B: Set[String]) : Set[String] =
+ for (s1 <- A; s2 <- B) yield s1 ++ s2
+
+
+val A = Set("foo", "bar")
+val B = Set("a", "b")
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re2.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,118 @@
+// A Version with an explicit n-times regular expression;
+// this keeps the size of the regular expression in the
+// EVIL1 test-case quite small
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor for n-times
+
+
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+ case NTIMES(r1, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
+}
+
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+// the optional regular expression: one or zero times
+// this regular expression is still defined in terms of ALT
+def OPT(r: Rexp) = ALT(r, ONE)
+
+
+// Test Cases
+
+// evil regular expressions
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start) / (i * 1.0e9)
+}
+
+
+
+// test: (a?{n}) (a{n})
+for (i <- 0 to 1000 by 100) {
+ println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
+}
+
+// test: (a*)* b
+for (i <- 0 to 20) {
+ println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
+}
+
+
+// the size of a regular expressions - for testing purposes
+def size(r: Rexp) : Int = r match {
+ case ZERO => 1
+ case ONE => 1
+ case CHAR(_) => 1
+ case ALT(r1, r2) => 1 + size(r1) + size(r2)
+ case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ case STAR(r) => 1 + size(r)
+ case NTIMES(r, _) => 1 + size(r)
+}
+
+// EVIL1(n) has now a constant size, no matter
+// what n is; also the derivative only grows
+// moderately
+
+size(EVIL1(1)) // 7
+size(EVIL1(3)) // 7
+size(EVIL1(5)) // 7
+size(EVIL1(7)) // 7
+size(EVIL1(20)) // 7
+
+size(ders("".toList, EVIL1(5))) // 7
+size(ders("a".toList, EVIL1(5))) // 16
+size(ders("aa".toList, EVIL1(5))) // 35
+size(ders("aaa".toList, EVIL1(5))) // 59
+size(ders("aaaa".toList, EVIL1(5))) // 88
+size(ders("aaaaa".toList, EVIL1(5))) // 122
+size(ders("aaaaaa".toList, EVIL1(5))) // 151
+
+// but the size of the derivatives can still grow
+// quite dramatically in case of EVIL2
+
+size(ders("".toList, EVIL2)) // 5
+size(ders("a".toList, EVIL2)) // 12
+size(ders("aa".toList, EVIL2)) // 28
+size(ders("aaa".toList, EVIL2)) // 58
+size(ders("aaaa".toList, EVIL2)) // 116
+size(ders("aaaaa".toList, EVIL2)) // 230
+size(ders("aaaaaa".toList, EVIL2)) // 456
+
+size(ders(("a" * 20).toList, EVIL2)) // 7340068
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re3.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,127 @@
+// A version with simplification of derivatives;
+// this keeps the regular expressions small, which
+// is good for the run-time
+
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+
+
+
+// the nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+// the derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+ case NTIMES(r, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
+}
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+
+// the derivative w.r.t. a string (iterates der)
+def ders(s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, simp(der(c, r)))
+}
+
+
+// the main matcher function
+def matcher(r: Rexp, s: String) : Boolean =
+ nullable(ders(s.toList, r))
+
+
+// one or zero
+def OPT(r: Rexp) = ALT(r, ONE)
+
+
+// Test Cases
+
+// evil regular expressions: (a?){n} a{n} and (a*)* b
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+//test: (a?{n}) (a{n})
+for (i <- 0 to 8000 by 1000) {
+ println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f")
+}
+
+//test: (a*)* b
+for (i <- 0 to 6000000 by 500000) {
+ println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f")
+}
+
+
+// size of a regular expressions - for testing purposes
+def size(r: Rexp) : Int = r match {
+ case ZERO => 1
+ case ONE => 1
+ case CHAR(_) => 1
+ case ALT(r1, r2) => 1 + size(r1) + size(r2)
+ case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ case STAR(r) => 1 + size(r)
+ case NTIMES(r, _) => 1 + size(r)
+}
+
+
+// now the size of the derivatives grows
+// much, much slower
+
+size(ders("".toList, EVIL2)) // 5
+size(ders("a".toList, EVIL2)) // 8
+size(ders("aa".toList, EVIL2)) // 8
+size(ders("aaa".toList, EVIL2)) // 8
+size(ders("aaaa".toList, EVIL2)) // 8
+size(ders("aaaaa".toList, EVIL2)) // 8
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re3a.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,112 @@
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+case class UPNTIMES(r: Rexp, n: Int) extends Rexp
+
+// the nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+ case UPNTIMES(r: Rexp, n: Int) => true
+}
+
+// the derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+ case NTIMES(r1, i) =>
+ if (i == 0) ZERO else
+ if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1))
+ else SEQ(der(c, r1), NTIMES(r1, i - 1))
+ case UPNTIMES(r1, i) =>
+ if (i == 0) ZERO
+ else SEQ(der(c, r1), UPNTIMES(r1, i - 1))
+}
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+
+// the derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, simp(der(c, r)))
+}
+
+
+// the main matcher function
+def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+// one or zero
+def OPT(r: Rexp) = ALT(r, ONE)
+
+// evil regular expressions
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+val EVIL3 = SEQ(STAR(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a')))), CHAR('b'))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ "%.5f".format((end - start) / (i * 1.0e9))
+}
+
+
+// test: (a?{n}) (a{n})
+for (i <- 0 to 8000 by 1000) {
+ println(s"$i: ${time_needed(2, matches(EVIL1(i), "a" * i))}")
+}
+
+// test: (a*)* b
+for (i <- 0 to 6000000 by 500000) {
+ println(s"$i: ${time_needed(2, matches(EVIL2, "a" * i))}")
+}
+
+
+val r0 = simp(der('a', EVIL3))
+val r1 = simp(der('a', r0))
+val r2 = simp(der('a', r1))
+val r3 = simp(der('a', r2))
+val r4 = simp(der('a', r3))
+val r5 = simp(der('a', r4))
+val r6 = simp(der('a', r5))
+
+//test: (a|aa)* b
+/*
+for (i <- 0 to 100 by 10) {
+ println(s"$i: ${time_needed(2, matches(EVIL3, "a" * i ++ "c"))}")
+}
+ */
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re3ext.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,151 @@
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+
+def simp(r: Rexp): Rexp = r match {
+ case ALT(r1, r2) => {
+ (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
+ }
+ }
+ case SEQ(r1, r2) => {
+ (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ }
+ case NTIMES(r, n) => NTIMES(simp(r), n)
+ case r => r
+}
+
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NTIMES(r, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
+}
+
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, simp(der(c, r)))
+}
+
+def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, ONE)
+
+def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+/*
+for (i <- 1 to 9001 by 1000) {
+ println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
+}
+*/
+
+// some convenience for typing in regular expressions
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => ONE
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+implicit def RexpOps (r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps (s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+def RANGE(s: List[Char]) : Rexp = s match {
+ case Nil => ZERO
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp =
+ if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1))
+
+
+println("Coursework:")
+val REG1 = PLUS(PLUS("a" ~ "a" ~ "a"))
+val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a?
+
+
+//40 * aaa matches
+//43 * aaa + aa does not
+//45 * aaa + a
+
+println("EVIL1:")
+println(matches(REG1, "aaa" * 40))
+println(matches(REG1, "aaa" * 43 + "aa"))
+println(matches(REG1, "aaa" * 45 + "a"))
+println("EVIL2:")
+println(matches(REG2, "aaa" * 40))
+println(matches(REG2, "aaa" * 43 + "aa"))
+println(matches(REG2, "aaa" * 45 + "a"))
+
+println("EMAIL:")
+val LOWERCASE = "abcdefghijklmnopqrstuvwxyz"
+val DIGITS = "0123456789"
+val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~
+ PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~
+ NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6)
+
+val my_email = "christian.urban@kcl.ac.uk"
+
+println(matches(EMAIL, my_email))
+
+println(matches(NTIMES("a", 2), "a"))
+println(matches(NTIMES("a", 2), "aa"))
+println(matches(NTIMES("a", 2), "aaa"))
+
+println(matches(NMTIMES("a", 2, 3), "a"))
+println(matches(NMTIMES("a", 2, 3), "aa"))
+println(matches(NMTIMES("a", 2, 3), "aaa"))
+println(matches(NMTIMES("a", 2, 3), "aaaa"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/re4.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,156 @@
+// A version which attempts to move whole strings, not
+// just characters, under derivatives whenever possible
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+
+// the nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+// the derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+ case NTIMES(r1, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
+}
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+// an example
+val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
+der('x', r)
+der('y', der('x', r))
+der('z', der('y', der('x', r)))
+simp(der('z', der('y', der('x', r))))
+
+// *new*
+// the derivative w.r.t. a string (iterates der)
+def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
+ case (Nil, r) => r
+ case (s, ZERO) => ZERO
+ case (s, ONE) => if (s == Nil) ONE else ZERO
+ case (s, CHAR(c)) => if (s == List(c)) ONE else
+ if (s == Nil) CHAR(c) else ZERO
+ case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2))
+ case (c::s, r) => ders2(s, simp(der(c, r)))
+}
+
+// the main matcher function
+def matcher(r: Rexp, s: String) : Boolean =
+ nullable(ders2(s.toList, r))
+
+
+// one or zero
+def OPT(r: Rexp) = ALT(r, ONE)
+
+
+// Test Cases
+
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+
+// for measuring time
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start) / (i * 1.0e9)
+}
+
+
+// test: (a?{n}) (a{n})
+for (i <- 0 to 11000 by 1000) {
+ println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
+}
+
+
+// test: (a*)* b
+for (i <- 0 to 7000000 by 500000) {
+ println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
+}
+
+
+
+// the size of a regular expressions - for testing purposes
+def size(r: Rexp) : Int = r match {
+ case ZERO => 1
+ case ONE => 1
+ case CHAR(_) => 1
+ case ALT(r1, r2) => 1 + size(r1) + size(r2)
+ case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ case STAR(r) => 1 + size(r)
+ case NTIMES(r, _) => 1 + size(r)
+}
+
+
+// string of a regular expressions - for testing purposes
+def string(r: Rexp) : String = r match {
+ case ZERO => "0"
+ case ONE => "1"
+ case CHAR(c) => c.toString
+ case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})"
+ case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
+ case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
+ case STAR(r) => s"(${string(r)})*"
+ case NTIMES(r, n) => s"(${string(r)}){${n}}"
+}
+
+
+// test: ("a" | "aa")*
+val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
+
+// test: ("" | "a" | "aa")*
+val EVIL4 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))))
+
+val t1 = ders2("a".toList, EVIL3)
+val t1s = simp(t1)
+val t2 = ders2("aa".toList, t1s)
+val t2s = simp(t2)
+val t3 = ders2("aaa".toList, t2s)
+val t3s = simp(t3)
+val t4 = ders2("aaaa".toList, t3s)
+val t4s = simp(t4)
+println(string(t1) + " " + size(t1))
+println("s " + string(t1s) + " " + size(t1s))
+println(string(t2) + " " + size(t2))
+println("s " + string(t2s) + " " + size(t2s))
+println(string(t3) + " " + size(t3))
+println("s " + string(t3s) + " " + size(t3s))
+println(string(t4) + " " + size(t4))
+println("s " + string(t4s) + " " + size(t4s))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/regexp2.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,123 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGIT = RANGE("0123456789".toList)
+val NONZERODIGIT = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val WHITESPACES = PLUS(WHITESPACE)
+
+val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+
+// an example list of regular expressions
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, t)
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), s, t ::: List(c))
+ }
+
+def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
+ val somes = regs.map { munch(_, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
+ case Nil => Nil
+ case _ => one_string(regs, s) match {
+ case (rest, s) => s.mkString :: tokenize(regs, rest)
+ }
+}
+
+//examples
+println(tokenize(regs, "if true then then 42 else +".toList))
+println(tokenize(regs, "if+true+then+then+42+else +".toList))
+println(tokenize(regs, "ifff if 34 34".toList))
+println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
+println(tokenize(regs, "/* if true then */ then 42 else +".toList))
+//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/rev.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,8 @@
+def rev(r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ONE
+ case CHAR(c) => CHAR(c)
+ case ALT(r1, r2) => ALT(rev(r1), rev(r2))
+ case SEQ(r1, r2) => SEQ(rev(r2), rev(r1))
+ case STAR(r) => STAR(rev(r))
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/token-bak.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,145 @@
+import scala.language.implicitConversions
+import scala.language.reflectiveCalls
+import scala.util._
+import scala.annotation.tailrec
+
+sealed abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+implicit def RexpOps(r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SYM ~ (SYM | DIGIT).%
+val NUM = PLUS(DIGIT)
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+val SEMI: Rexp = ";"
+val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+
+//regular expressions ranked by position in the list
+val regs: List[Rexp] =
+ List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE)
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+def zeroable (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+ case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
+ case STAR(_) => false
+}
+
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+}
+
+
+// calculates derivatives until all of them are zeroable
+@tailrec
+def munch(s: List[Char],
+ pos: Int,
+ rs: List[Rexp],
+ last: Option[Int]): Option[Int] = rs match {
+ case Nil => last
+ case rs if (s.length <= pos) => last
+ case rs => {
+ val ders = rs.map(der(s(pos), _))
+ val rs_nzero = ders.filterNot(zeroable(_))
+ val rs_nulls = ders.filter(nullable(_))
+ val new_last = if (rs_nulls != Nil) Some(pos) else last
+ munch(s, 1 + pos, rs_nzero, new_last)
+ }
+}
+
+// iterates the munching function and prints
+// out the component strings
+@tailrec
+def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match {
+ case None if (s == "") => println("EOF")
+ case None => println(s"Lexing error: $s")
+ case Some(n) => {
+ val (head, tail) = s.splitAt(n + 1)
+ print(s"|${head.replaceAll("\n","RET")}|")
+ tokenize(tail, rs)
+ }
+}
+
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+};
+write x;
+write y;
+write z
+"""
+
+tokenize(test_prog, regs)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/while.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,244 @@
+// A parser and evaluator for the while language
+//
+import matcher._
+import parser._
+
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[(Rexp, List[Char] => Token)] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+def len(xs: List[(Int, Int)]): Int = xs match {
+ case Nil => 0
+ case (1, x)::xs => len(xs) + 1
+ case (_, x)::xs => len(xs)
+}
+
+def fst(p: (Int, Int)): Int = p match {
+ case Nil => 0
+ case (1, x)::xs => len(xs) + 1
+ case (_, x)::xs => len(xs)
+}
+
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } ||
+ (T_KWD("true") ==> ((_) => True)) ||
+ (T_KWD("false") ==> ((_) => False: BExp))
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+
+// examples
+val p1 = "x := 5"
+val p1_toks = Tok.fromString(p1)
+val p1_ast = Block.parse_all(p1_toks)
+println(p1_toks)
+println(p1_ast)
+
+val p1a = "{ x := 5; y := 8}"
+val p1a_toks = Tok.fromString(p1a)
+val p1a_ast = Block.parse_all(p1a_toks)
+println(p1a_ast)
+
+val p2 = "5 = 6"
+val p2_toks = Tok.fromString(p2)
+val p2_ast = BExp.parse_all(p2_toks)
+println(p2_ast)
+
+val p2a = "true"
+val p2a_toks = Tok.fromString(p2a)
+val p2a_ast = BExp.parse_all(p2a_toks)
+println(p2a_ast)
+
+val p3 = "if true then skip else skip"
+val p3_toks = Tok.fromString(p3)
+val p3_ast = Stmt.parse_all(p3_toks)
+println(p3_ast)
+
+val p3a = "if true then x := 5 else x := 10"
+val p3a_toks = Tok.fromString(p3a)
+val p3a_ast = Stmt.parse_all(p3a_toks)
+println(p3a_ast)
+
+val p3b = "if false then x := 5 else x := 10"
+val p3b_toks = Tok.fromString(p3b)
+val p3b_ast = Stmt.parse_all(p3b_toks)
+println(p3b_ast)
+
+// multiplication
+val p4 = """{ x := 5;
+ y := 4;
+ r := 0;
+ while y > 0 do {
+ r := r + x;
+ y := y - 1
+ }
+ }"""
+val p4_toks = Tok.fromString(p4)
+val p4_ast = Block.parse_all(p4_toks)
+println(p4_ast)
+
+val p5 = """
+ n := 9;
+ minus1 := 0;
+ minus2 := 1;
+ temp := 0;
+ while n > 0 do {
+ temp := minus2;
+ minus2 := minus1 + minus2;
+ minus1 := temp;
+ n := n - 1
+ };
+ fib_res := minus2
+"""
+val p5_toks = Tok.fromString(p5)
+val p5_ast = Stmts.parse_all(p5_toks)
+
+// interpreter
+type Env = Map[String, Int]
+
+def eval_bexp(b: BExp, env: Env) : Boolean = b match {
+ case True => true
+ case False => false
+ case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
+ case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
+ case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
+ case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
+}
+
+def eval_aexp(a: AExp, env : Env) : Int = a match {
+ case Num(i) => i
+ case Var(s) => env(s)
+ case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
+ case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
+ case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
+}
+
+def eval_stmt(s: Stmt, env: Env) : Env = s match {
+ case Skip => env
+ case Assign(x, a) => env + (x -> eval_aexp(a, env))
+ case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
+ case While(b, bl) =>
+ if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
+ else env
+}
+
+def eval_bl(bl: Block, env: Env) : Env = bl match {
+ case Nil => env
+ case s::bl => eval_bl(bl, eval_stmt(s, env))
+}
+
+//examples
+println(eval_stmt(p3a_ast.head, Map.empty))
+println(eval_stmt(p3b_ast.head, Map.empty))
+println(eval_bl(p4_ast.head, Map.empty))
+println(eval_bl(p5_ast.head, Map.empty))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Attic/while1.scala Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,221 @@
+// A parser and evaluator for the WHILE language
+//
+
+
+
+
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[(Rexp, List[Char] => Token)] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+case class Write(s: String) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("false") ==> ((_) => False: BExp)) ||
+ (T_LPAREN ~> BExp <~ T_RPAREN) ||
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp }
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
+ (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+// interpreter
+type Env = Map[String, Int]
+
+def eval_bexp(b: BExp, env: Env) : Boolean = b match {
+ case True => true
+ case False => false
+ case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
+ case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
+ case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
+}
+
+def eval_aexp(a: AExp, env : Env) : Int = a match {
+ case Num(i) => i
+ case Var(s) => env(s)
+ case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
+ case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
+ case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
+}
+
+def eval_stmt(s: Stmt, env: Env) : Env = s match {
+ case Skip => env
+ case Assign(x, a) => env + (x -> eval_aexp(a, env))
+ case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
+ case While(b, bl) =>
+ if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
+ else env
+ case Write(x) => { println(env(x)); env }
+}
+
+def eval_bl(bl: Block, env: Env) : Env = bl match {
+ case Nil => env
+ case s::bl => eval_bl(bl, eval_stmt(s, env))
+}
+
+def eval_prog(name: String) : Env = {
+ val tks = Tok.fromFile(name)
+ val ast = Stmts.parse_single(tks)
+ eval_bl(ast, Map.empty)
+}
+
+
+//examples
+
+//eval_prog("loops.while")
+eval_prog("fib.while")
+
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+}
+"""
+
+
+
+def eval_test(n: Int) : Unit = {
+ val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
+ val ast = Stmts.parse_single(tks)
+ println(n + " " + time_needed(2, eval_bl(ast, Map.empty)))
+}
+
+List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_))
+
+
+
+
+
+
+
Binary file handouts/ho01.pdf has changed
--- a/handouts/ho01.tex Mon Jul 27 11:02:48 2020 +0100
+++ b/handouts/ho01.tex Thu Jul 30 13:50:54 2020 +0100
@@ -46,6 +46,7 @@
\section*{Handout 1}
+
The purpose of a compiler is to transform a program a human can read and
write into code the machine can run as fast as possible. Developing a
compiler is an old craft going back to 1952 with the first compiler
@@ -479,8 +480,18 @@
structure of regular expressions is always clear. But for
writing them down in a more mathematical fashion, parentheses
will be helpful. For example we will write $(r_1 + r_2)^*$,
-which is different from, say $r_1 + (r_2)^*$. The former means
-roughly zero or more times $r_1$ or $r_2$, while the latter
+which is different from, say $r_1 + (r_2)^*$. This can be
+seen if we write regular expressions as trees:
+
+\begin{center}
+\includegraphics[scale=0.65]{../pics/r1.pdf}
+\hspace{3cm}
+\includegraphics[scale=0.65]{../pics/r2.pdf}
+\end{center}
+
+\noindent
+The regular expression on the left means
+roughly zero or more times $r_1$ or $r_2$, while the one on the right
means $r_1$, or zero or more times $r_2$. This will turn out to
be two different patterns, which match in general different
strings. We should also write $(r_1 + r_2) + r_3$, which is
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/automata/build.sh Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -euo pipefail
+
+amm thompson.sc
+
--- a/progs/automata/thompson.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/automata/thompson.sc Thu Jul 30 13:50:54 2020 +0100
@@ -171,7 +171,7 @@
// construction), in general the DFAs can be slow because of
// the state explosion in the subset construction
-for (i <- 1 to 13) {
+for (i <- 1 to 7) {
println(i + ": " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL1(i), "a" * i))))
}
--- a/progs/bf/bfc0.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/bf/bfc0.sc Thu Jul 30 13:50:54 2020 +0100
@@ -7,7 +7,9 @@
//
//
// Note: An interesting exercise is to call
-// gcc with -O3 instead of -O0
+// gcc with -O3 instead of -O0 (see invocation
+// below).
+
// simple instructions
def instr(c: Char) : String = c match {
@@ -25,7 +27,7 @@
def instrs(prog: String) : String =
prog.toList.map(instr(_)).mkString
-// adding the boilerplate
+// adding boilerplate
def compile(prog: String) : String =
s"""#include <string.h>
#include <stdio.h>
@@ -36,16 +38,17 @@
${instrs(prog)}
return 0;}"""
-def compile_file(name: String, prog: String) =
+
+def compile_to_file(name: String, prog: String) =
os.write.over(os.pwd / name, compile(prog))
// running the c-compiler over the transpiled
// BF program and running the resulting binary
-def compile_run(prog: String) = {
+def compile_and_run(prog: String) = {
val tn = "tmp"
- compile_file(s"${tn}.c", prog)
+ compile_to_file(s"${tn}.c", prog)
os.proc("gcc", "-O0", "-o", tn, s"${tn}.c").call() // call gcc
os.proc("./tmp").call(stdout = os.Inherit) // run binary
}
@@ -64,7 +67,7 @@
@main
def main(fname: String) = {
val bf_str = os.read(os.pwd / fname)
- println(s"${time_needed(1, compile_run(bf_str))} secs")
+ println(s"${time_needed(1, compile_and_run(bf_str))} secs")
}
--- a/progs/bf/bfc1.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/bf/bfc1.sc Thu Jul 30 13:50:54 2020 +0100
@@ -2,8 +2,8 @@
//=========================================
//
// This version "optimises" the code by replacing
-// for example +++ by (*ptr) += 3, instead of
-// (*ptr)++, (*ptr)++, (*ptr)++
+// for example +++ by (*ptr) += 3, instead of three
+// separate (*ptr)++, (*ptr)++, (*ptr)++
//
// Call with
//
@@ -11,7 +11,7 @@
//
-// generating "compound" c-instructions
+// generate "compound" c-instructions
def instr2(c: Char, n: Int) : String = c match {
case '>' => s"ptr += $n ;"
case '<' => s"ptr -= $n ;"
@@ -40,7 +40,7 @@
def instrs2(prog: String) : String =
splice(prog.toList, Nil).reverse.mkString
-// adding the boilerplate
+// adding boilerplate
def compile(prog: String) : String =
s"""#include <string.h>
#include <stdio.h>
@@ -51,16 +51,16 @@
${instrs2(prog)}
return 0;}"""
-def compile_file(name: String, prog: String) =
+def compile_to_file(name: String, prog: String) =
os.write.over(os.pwd / name, compile(prog))
// running the c-compiler over the transpiled
// BF program and running the resulting binary
-def compile_run(prog: String) = {
+def compile_and_run(prog: String) = {
val tn = "tmp"
- compile_file(s"${tn}.c", prog)
+ compile_to_file(s"${tn}.c", prog)
os.proc("gcc", "-O0", "-o", tn, s"${tn}.c").call() // call gcc
os.proc("./tmp").call(stdout = os.Inherit) // run binary
}
@@ -79,6 +79,6 @@
@main
def main(fname: String) = {
val bf_str = os.read(os.pwd / fname)
- println(s"${time_needed(1, compile_run(bf_str))} secs")
+ println(s"${time_needed(1, compile_and_run(bf_str))} secs")
}
--- a/progs/bf/bfi.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/bf/bfi.sc Thu Jul 30 13:50:54 2020 +0100
@@ -7,12 +7,10 @@
//
-import scala.util._
-
-// BF memory as a map
+// BF memory is represented as a Map
type Mem = Map[Int, Int]
-// reading and writing BF memory
+// safe reading and writing of BF memory
def sread(mem: Mem, mp: Int) : Int =
mem.getOrElse(mp, 0)
--- a/progs/catastrophic/catastrophic.py Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/catastrophic/catastrophic.py Thu Jul 30 13:50:54 2020 +0100
@@ -9,7 +9,7 @@
#
# call with timing as:
#
-# > time ./catastrophic.py 20
+# time ./catastrophic.py 20
# counter n given on the command line
cn = sys.argv[1]
--- a/progs/catastrophic/catastrophic.rb Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/catastrophic/catastrophic.rb Thu Jul 30 13:50:54 2020 +0100
@@ -8,7 +8,7 @@
#
# run on the command line with:
#
-# $> ruby catastrophic.rb
+# ruby catastrophic.rb
#
nums = (1..1000)
--- a/progs/catastrophic/catastrophic2.py Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/catastrophic/catastrophic2.py Thu Jul 30 13:50:54 2020 +0100
@@ -9,7 +9,7 @@
#
# call with timing as:
#
-# > time ./catastrophic.py 20
+# time ./catastrophic.py 20
# counter n given on the command line
cn = sys.argv[1]
--- a/progs/collatz.while Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-write "Input a number ";
-read n;
-while n > 1 do {
- if n % 2 == 0
- then n := n/2
- else n := 3*n+1;
-};
-write "Yes";
--- a/progs/defs.fun Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-
-def zero(x) = 0;
-
-def suc(x) = x + 1;
-
-def pred(x) =
- if x == 0 then x else x - 1;
-
-def add(x, y) =
- if x == 0 then y else suc(add(x - 1, y));
-
-def mult(x, y) =
- if x == 0 then 0 else add(y, mult(x - 1, y));
-
-def pow(x, y) =
- if y == 0 then 1 else mult(x, pow(x, y - 1));
-
-def fib(n) = if n == 0 then 0
- else if n == 1 then 1
- else fib(n - 1) + fib(n - 2);
-
-def fact(n) =
- if n == 0 then 1 else n * fact(n - 1);
-
-def ack(m, n) = if m == 0 then n + 1
- else if n == 0 then ack(m - 1, 1)
- else ack(m - 1, ack(m, n - 1));
-
-def stack_test(x) = x + 1 + 2 + 3 + 4 + 5;
-
-def div(x, y) = x / y; //integer division
-
-def rem(x, y) = x % y; //remainder
-
-def gcd(a, b) =
- if b == 0 then a else gcd(b, a % b);
-
-def is_prime_aux(n, i) =
- if n % i == 0 then 0
- else if (i * i) <= n then is_prime_aux(n, i + 1)
- else 1;
-
-def is_prime(n) = if n == 2 then 1 else is_prime_aux(n, 2);
-
-def primes(n) =
- if n == 0 then 0
- else if is_prime(n) == 1 then (write n; primes(n - 1))
- else primes(n - 1);
-
-def is_collatz(n) =
- if n == 1 then 1
- else if n % 2 == 0 then is_collatz(n / 2)
- else is_collatz(3 * n + 1);
-
-def collatz_aux(n, i) =
- if i > n then 0
- else if is_collatz(i) == 1 then (write i; collatz_aux(n, i + 1))
- else collatz_aux(n, i + 1);
-
-def collatz(n) = collatz_aux(n, 1);
-
-def facT(n, acc) =
- if n == 0 then acc else facT(n - 1, n * acc);
-
-
-//zero(3)
-//suc(8)
-//pred(7)
-//write(add(3, 4))
-//mult(4,5)
-//pow(2, 3)
-//fib(20)
-//(write(fact(5)) ; fact(6))
-//(write(1) ; 2)
-//write(ack(3, 12)) // for tail-rec test
-//stack_test(0)
-//(write (div(11, 3)); rem(11, 3))
-//gcd(54, 24)
-//is_prime(2)
-primes(1000)
-//primes(1000000)
-//collatz(4000)
-//collatz(5000)
-//facT(6, 1)
-
--- a/progs/detokenise.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-// Detokenising the ouput of Tokeniser
-//=====================================
-//
-// call with
-//
-// scala detokenise.scala fib.tks
-//
-// scala detokenise.scala loops.tks
-
-object Detokenise {
-
-import java.io._
-import scala.util._
-
-abstract class Token extends Serializable
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_KWD(s: String) extends Token
-case class T_STR(s: String) extends Token
-
-def deserialise[T](fname: String) : Try[T] = {
- //import scala.util.Using
- //Using(new ObjectInputStream(new FileInputStream(fname))) {
- // in => in.readObject.asInstanceOf[T]
- //}
- Try(new ObjectInputStream(new FileInputStream(fname))).get {
- in => in.readObject.asInstanceOf[T]
- }
-}
-
-def main(args: Array[String]) = {
- val fname = args(0)
- val tks = deserialise[List[Token]](fname).getOrElse(Nil)
- println(s"Reading back from ${fname}:\n${tks.mkString(", ")}")
-}
-
-
-}
--- a/progs/fact.fun Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-// a simple factorial program
-// (including a tail recursive version)
-
-
-def fact(n) =
- if n == 0 then 1 else n * fact(n - 1);
-
-def facT(n, acc) =
- if n == 0 then acc else facT(n - 1, n * acc);
-
-def facTi(n) = facT(n, 1);
-
-//fact(10)
-//facTi(10)
-
-write(facTi(6))
-
--- a/progs/factors.while Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-// Find all factors of a given input number
-// by J.R. Cordy August 2005
-
-write "Input n please";
-read n;
-write "The factors of n are";
-f := 2;
-while n != 1 do {
- while (n / f) * f == n do {
- write f;
- n := n / f
- };
- f := f + 1
-}
\ No newline at end of file
--- a/progs/fib.while Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-write "Fib";
-read n;
-minus1 := 0;
-minus2 := 1;
-while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
-};
-write "Result";
-write minus2
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun-tests/defs.fun Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,85 @@
+
+def zero(x) = 0;
+
+def suc(x) = x + 1;
+
+def pred(x) =
+ if x == 0 then x else x - 1;
+
+def add(x, y) =
+ if x == 0 then y else suc(add(x - 1, y));
+
+def mult(x, y) =
+ if x == 0 then 0 else add(y, mult(x - 1, y));
+
+def pow(x, y) =
+ if y == 0 then 1 else mult(x, pow(x, y - 1));
+
+def fib(n) = if n == 0 then 0
+ else if n == 1 then 1
+ else fib(n - 1) + fib(n - 2);
+
+def fact(n) =
+ if n == 0 then 1 else n * fact(n - 1);
+
+def ack(m, n) = if m == 0 then n + 1
+ else if n == 0 then ack(m - 1, 1)
+ else ack(m - 1, ack(m, n - 1));
+
+def stack_test(x) = x + 1 + 2 + 3 + 4 + 5;
+
+def div(x, y) = x / y; //integer division
+
+def rem(x, y) = x % y; //remainder
+
+def gcd(a, b) =
+ if b == 0 then a else gcd(b, a % b);
+
+def is_prime_aux(n, i) =
+ if n % i == 0 then 0
+ else if (i * i) <= n then is_prime_aux(n, i + 1)
+ else 1;
+
+def is_prime(n) = if n == 2 then 1 else is_prime_aux(n, 2);
+
+def primes(n) =
+ if n == 0 then 0
+ else if is_prime(n) == 1 then (write n; primes(n - 1))
+ else primes(n - 1);
+
+def is_collatz(n) =
+ if n == 1 then 1
+ else if n % 2 == 0 then is_collatz(n / 2)
+ else is_collatz(3 * n + 1);
+
+def collatz_aux(n, i) =
+ if i > n then 0
+ else if is_collatz(i) == 1 then (write i; collatz_aux(n, i + 1))
+ else collatz_aux(n, i + 1);
+
+def collatz(n) = collatz_aux(n, 1);
+
+def facT(n, acc) =
+ if n == 0 then acc else facT(n - 1, n * acc);
+
+
+//zero(3)
+//suc(8)
+//pred(7)
+//write(add(3, 4))
+//mult(4,5)
+//pow(2, 3)
+//fib(20)
+//(write(fact(5)) ; fact(6))
+//(write(1) ; 2)
+//write(ack(3, 12)) // for tail-rec test
+//stack_test(0)
+//(write (div(11, 3)); rem(11, 3))
+//gcd(54, 24)
+//is_prime(2)
+primes(1000)
+//primes(1000000)
+//collatz(4000)
+//collatz(5000)
+//facT(6, 1)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun-tests/fact.fun Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,17 @@
+// a simple factorial program
+// (including a tail recursive version)
+
+
+def fact(n) =
+ if n == 0 then 1 else n * fact(n - 1);
+
+def facT(n, acc) =
+ if n == 0 then acc else facT(n - 1, n * acc);
+
+def facTi(n) = facT(n, 1);
+
+//fact(10)
+//facTi(10)
+
+write(facTi(6))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun-tests/hanoi.fun Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,10 @@
+// towers of hanoi in Fun
+
+let rec hanoi = fun n a b c ->
+ if n != 0 then (
+ hanoi (n - 1) a c b;
+ print_endline ("Move disk from pole " ^ (show a) ^ " to pole " ^ (show b));
+ hanoi (n - 1) c b a
+ ) else ();;
+
+impure $ hanoi 4 1 2 3;;
\ No newline at end of file
--- a/progs/hanoi.fun Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-// towers of hanoi in Fun
-
-let rec hanoi = fun n a b c ->
- if n != 0 then (
- hanoi (n - 1) a c b;
- print_endline ("Move disk from pole " ^ (show a) ^ " to pole " ^ (show b));
- hanoi (n - 1) c b a
- ) else ();;
-
-impure $ hanoi 4 1 2 3;;
\ No newline at end of file
--- a/progs/i.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,492 +0,0 @@
-
-// regular expressions including NOT
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case object ALLC extends Rexp // recognises any character
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp // negation of a regular expression
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case ALLC => EMPTY
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-// regular expression for specifying
-// ranges of characters
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-
-// one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-// many alternatives
-def Alts(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => ALT(r, Alts(rs))
-}
-def ALTS(rs: Rexp*) = Alts(rs.toList)
-
-// repetitions
-def Seqs(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => SEQ(r, Seqs(rs))
-}
-def SEQS(rs: Rexp*) = Seqs(rs.toList)
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-type Rule[T] = (Rexp, List[Char] => T)
-
-case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) {
-
- def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, action(t))
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), action, s, t ::: List(c))
- }
-
- def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
- val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
- if (somes == Nil) Right(s.mkString)
- else Left(somes sortBy (_._1.length) head)
- }
-
- def tokenize(cs: List[Char]) : List[T] = cs match {
- case Nil => Nil
- case _ => one_token(cs) match {
- case Left((rest, token)) => token :: tokenize(rest)
- case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
- }
- }
-
- def fromString(s: String) : List[T] =
- tokenize(s.toList).filterNot(excl.contains(_))
-
- def fromFile(name: String) : List[T] =
- fromString(io.Source.fromFile(name).mkString)
-
-}
-
-
-// parser combinators with input type I and return type T
-
-abstract class Parser[I <% Seq[_], T] {
- def parse(ts: I): Set[(T, I)]
-
- def parse_all(ts: I) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
-
- def parse_single(ts: I) : T = parse_all(ts).toList match {
- case t::Nil => t
- case _ => { println ("Parse Error") ; sys.exit(-1) }
- }
-
- def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
-}
-
-class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
-}
-
-class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
-
-
-// A parser and evaluator for teh while language
-//
-//:load matcher.scala
-//:load parser3.scala
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_COMMENT extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[Rule[Token]] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE),
- (COMMENT, (s) => T_COMMENT))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-case class Write(s: String) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (T_KWD("true") ==> ((_) => True: BExp)) ||
- (T_KWD("false") ==> ((_) => False: BExp)) ||
- (T_LPAREN ~> BExp <~ T_RPAREN) ||
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
- (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-// compiler
-val beginning = """
-.class public XXX.XXX
-.super java/lang/Object
-
-.method public <init>()V
- aload_0
- invokenonvirtual java/lang/Object/<init>()V
- return
-.end method
-
-.method public static write(I)V
- .limit locals 5
- .limit stack 5
- iload 0
- getstatic java/lang/System/out Ljava/io/PrintStream;
- swap
- invokevirtual java/io/PrintStream/println(I)V
- return
-.end method
-
-
-.method public static main([Ljava/lang/String;)V
- .limit locals 200
- .limit stack 200
-
-"""
-
-val ending = """
-
- return
-
-.end method
-"""
-
-// for generating new labels
-var counter = -1
-
-def Fresh(x: String) = {
- counter += 1
- x ++ "_" ++ counter.toString()
-}
-
-type Env = Map[String, String]
-type Instrs = List[String]
-
-def compile_aexp(a: AExp, env : Env) : Instrs = a match {
- case Num(i) => List("ldc " + i.toString + "\n")
- case Var(s) => List("iload " + env(s) + "\n")
- case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
- case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
- case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
-}
-
-def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
- case True => Nil
- case False => List("goto " + jmp + "\n")
- case Relop("=", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
- case Relop("!=", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
- case Relop("<", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
-}
-
-
-def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
- case Skip => (Nil, env)
- case Assign(x, a) => {
- val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
- (compile_aexp(a, env) ++
- List("istore " + index + "\n"), env + (x -> index))
- }
- case If(b, bl1, bl2) => {
- val if_else = Fresh("If_else")
- val if_end = Fresh("If_end")
- val (instrs1, env1) = compile_bl(bl1, env)
- val (instrs2, env2) = compile_bl(bl2, env1)
- (compile_bexp(b, env, if_else) ++
- instrs1 ++
- List("goto " + if_end + "\n") ++
- List("\n" + if_else + ":\n\n") ++
- instrs2 ++
- List("\n" + if_end + ":\n\n"), env2)
- }
- case While(b, bl) => {
- val loop_begin = Fresh("Loop_begin")
- val loop_end = Fresh("Loop_end")
- val (instrs1, env1) = compile_bl(bl, env)
- (List("\n" + loop_begin + ":\n\n") ++
- compile_bexp(b, env, loop_end) ++
- instrs1 ++
- List("goto " + loop_begin + "\n") ++
- List("\n" + loop_end + ":\n\n"), env1)
- }
- case Write(x) =>
- (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
-}
-
-def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
- case Nil => (Nil, env)
- case s::bl => {
- val (instrs1, env1) = compile_stmt(s, env)
- val (instrs2, env2) = compile_bl(bl, env1)
- (instrs1 ++ instrs2, env2)
- }
-}
-
-def compile(input: String) : String = {
- val class_name = input.split('.')(0)
- val tks = Tok.fromFile(input)
- val ast = Stmts.parse_single(tks)
- val instructions = compile_bl(ast, Map.empty)._1
- (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
-}
-
-
-def compile_to(input: String, output: String) = {
- val fw = new java.io.FileWriter(output)
- fw.write(compile(input))
- fw.close()
-}
-
-//
-val tks = Tok.fromString("x := x + 1")
-val ast = Stmt.parse_single(tks)
-println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
-
-
-
-//examples
-
-compile_to("loops.while", "loops.j")
-//compile_to("fib.while", "fib.j")
-
-
-// testing cases for time measurements
-/*
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-// for testing
-import scala.sys.process._
-
-val test_prog = """
-start := XXX;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-};
-write x;
-write y;
-write z
-"""
-
-
-def compile_test(n: Int) : Unit = {
- val class_name = "LOOP"
- val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
- val ast = Stmts.parse_single(tks)
- val instructions = compile_bl(ast, Map.empty)._1
- val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
- val fw = new java.io.FileWriter(class_name + ".j")
- fw.write(assembly)
- fw.close()
- val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
- println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
-}
-
-List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
-
-
-
-// javabyte code assmbler
-//
-// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
-
-*/
-
-
-
-
-
--- a/progs/lexer.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,280 +0,0 @@
-// A simple lexer inspired by work of Sulzmann & Lu
-//==================================================
-
-
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-// regular expressions including records
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
-
-// values
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]): Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp =
- charlist2rexp(s.toList)
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
- def $ (r: Rexp) = RECD(s, r)
-}
-
-def nullable(r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case RECD(_, r1) => nullable(r1)
-}
-
-def der(c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case RECD(_, r1) => der(c, r1)
-}
-
-
-// extracts a string from value
-def flatten(v: Val) : String = v match {
- case Empty => ""
- case Chr(c) => c.toString
- case Left(v) => flatten(v)
- case Right(v) => flatten(v)
- case Sequ(v1, v2) => flatten(v1) + flatten(v2)
- case Stars(vs) => vs.map(flatten).mkString
- case Rec(_, v) => flatten(v)
-}
-
-
-// extracts an environment from a value;
-// used for tokenise a string
-def env(v: Val) : List[(String, String)] = v match {
- case Empty => Nil
- case Chr(c) => Nil
- case Left(v) => env(v)
- case Right(v) => env(v)
- case Sequ(v1, v2) => env(v1) ::: env(v2)
- case Stars(vs) => vs.flatMap(env)
- case Rec(x, v) => (x, flatten(v))::env(v)
-}
-
-// The Injection Part of the lexer
-
-def mkeps(r: Rexp) : Val = r match {
- case ONE => Empty
- case ALT(r1, r2) =>
- if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
- case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
- case STAR(r) => Stars(Nil)
- case RECD(x, r) => Rec(x, mkeps(r))
-}
-
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
- case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
- case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
- case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
- case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
- case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
- case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
-}
-
-// some "rectification" functions for simplification
-def F_ID(v: Val): Val = v
-def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
-def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
-def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Right(v) => Right(f2(v))
- case Left(v) => Left(f1(v))
-}
-def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
- case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
-}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
- (v:Val) => Sequ(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
- case Rec(x, v) => Rec(x, f(v))
-}
-def F_ERROR(v: Val): Val = throw new Exception("error")
-
-def simp(r: Rexp): (Rexp, Val => Val) = r match {
- case ALT(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (r2s, F_RIGHT(f2s))
- case (_, ZERO) => (r1s, F_LEFT(f1s))
- case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
- }
- }
- case SEQ(r1, r2) => {
- val (r1s, f1s) = simp(r1)
- val (r2s, f2s) = simp(r2)
- (r1s, r2s) match {
- case (ZERO, _) => (ZERO, F_ERROR)
- case (_, ZERO) => (ZERO, F_ERROR)
- case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
- case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
- case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
- }
- }
- case r => (r, F_ID)
-}
-
-// lexing functions including simplification
-def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else
- { throw new Exception("lexing error") }
- case c::cs => {
- val (r_simp, f_simp) = simp(der(c, r))
- inj(r, c, f_simp(lex_simp(r_simp, cs)))
- }
-}
-
-def lexing_simp(r: Rexp, s: String) =
- env(lex_simp(r, s.toList))
-
-
-// The Lexing Rules for the Fun Language
-
-def PLUS(r: Rexp) = r ~ r.%
-
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => ZERO
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
-val SEMI: Rexp = ";"
-val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
-val WHITESPACE = PLUS(" " | "\n" | "\t")
-val RPAREN: Rexp = "{"
-val LPAREN: Rexp = "}"
-val STRING: Rexp = "\"" ~ SYM.% ~ "\""
-
-
-val WHILE_REGS = (("k" $ KEYWORD) |
- ("i" $ ID) |
- ("o" $ OP) |
- ("n" $ NUM) |
- ("s" $ SEMI) |
- ("str" $ STRING) |
- ("p" $ (LPAREN | RPAREN)) |
- ("w" $ WHITESPACE)).%
-
-
-// Two Simple While Tests
-//========================
-
-println("test: read n")
-
-val prog0 = """read n"""
-println(lexing_simp(WHILE_REGS, prog0))
-
-println("test: read n; write n ")
-
-val prog1 = """read n; write n"""
-println(lexing_simp(WHILE_REGS, prog1))
-
-
-// Bigger Tests
-//==============
-
-// escapes strings and prints them out as "", "\n" and so on
-def esc(raw: String): String = {
- import scala.reflect.runtime.universe._
- Literal(Constant(raw)).toString
-}
-
-def escape(tks: List[(String, String)]) =
- tks.map{ case (s1, s2) => (s1, esc(s2))}
-
-val prog2 = """
-write "Fib";
-read n;
-minus1 := 0;
-minus2 := 1;
-while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
-};
-write "Result";
-write minus2
-"""
-
-println("lexing Fib")
-println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
-
-
-
-val prog3 = """
-start := 1000;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-"""
-
-println("lexing Loops")
-println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
-
--- a/progs/lexer/lexer.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/lexer/lexer.sc Thu Jul 30 13:50:54 2020 +0100
@@ -81,7 +81,7 @@
}
-// extracts a string from value
+// extracts a string from a value
def flatten(v: Val) : String = v match {
case Empty => ""
case Chr(c) => c.toString
--- a/progs/loops.while Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-start := 1000;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do { z := z - 1 };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/matcher/build.sc Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,15 @@
+import mill._
+import scalalib._
+
+val baseDir = build.millSourcePath
+
+
+object matcher extends ScalaModule {
+ def scalaVersion = "2.13.2"
+ def mainClass = Some("re1 all")
+
+ //override def resources = T.sources(baseDir)
+ override def sources = T.sources(baseDir / "re1.sc")
+
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/matcher/build.sh Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -euo pipefail
+
+amm re1.sc all
+amm re2.sc all
+amm re3.sc all
+amm re4.sc all
--- a/progs/matcher/re2.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/matcher/re2.sc Thu Jul 30 13:50:54 2020 +0100
@@ -74,20 +74,23 @@
-// test: (a?{n}) (a{n})
@doc("Test (a?{n}) (a{n})")
@main
def test1() = {
+ println("Test (a?{n}) (a{n})")
+
for (i <- 0 to 1000 by 100) {
println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
}
}
-// test: (a*)* b
+
@doc("Test (a*)* b")
@main
def test2() = {
- for (i <- 0 to 20) {
+ println("Test (a*)* b")
+
+ for (i <- 0 to 20 by 2) {
println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
}
}
--- a/progs/matcher/re3.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/matcher/re3.sc Thu Jul 30 13:50:54 2020 +0100
@@ -96,19 +96,23 @@
}
-//test: (a?{n}) (a{n})
+//
@doc("Test (a?{n}) (a{n})")
@main
def test1() = {
+ println("Test (a?{n}) (a{n})")
+
for (i <- 0 to 8000 by 1000) {
println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f")
}
}
-//test: (a*)* b
+//
@doc("Test (a*)* b")
@main
def test2() = {
+ println("Test (a*)* b")
+
for (i <- 0 to 6000000 by 500000) {
println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f")
}
--- a/progs/matcher/re4.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/matcher/re4.sc Thu Jul 30 13:50:54 2020 +0100
@@ -3,11 +3,14 @@
//
// call the test cases with X = {1,2}
//
-// amm re3.sc testX
+// amm re4.sc testX
//
// or
//
-// amm re3.sc all
+// amm re4.sc all
+//
+// !! DO NOT USE THIS VERSION AS A STARTING POINT !!
+// !! FOR CW1 & 2! USE re3.sc INSTEAD !!
abstract class Rexp
--- a/progs/mllex.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-:load matcher.scala
-
-
-// some regular expressions
-val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
- "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
- "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
- "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
- "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
- "sharing", "sig", "signature", "struct", "structure", "then", "type",
- "val", "where", "while", "with", "withtype"))
-
-val DIGITS = RANGE("0123456789")
-val NONZERODIGITS = RANGE("123456789")
-
-val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
-val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES)
-
-val ALL = ALTS(KEYWORDS, INTEGERS)
-
-val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/")
-
-
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-val WHITESPACE = PLUS(RANGE(" \n".toList))
-val OPS = RANGE("+-*".toList)
-
-// for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_NUM extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_NT(s: String, rhs: List[Token]) extends Token
-
-def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] =
- tokenize(rs, s.toList).filterNot(_ match {
- case T_WHITESPACE => true
- case _ => false
- })
-
-
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-tokenize_file(Nil, "nominal_library.ML")
-
-
-
-
-type Grammar = List[(String, List[Token])]
-
-// grammar for arithmetic expressions
-val grammar =
- List ("E" -> List(T_NUM),
- "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)),
- "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)),
- "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)),
- "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN))
-
-def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match {
- case (_, Nil) => true
- case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2)
- case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2)
- case _ => false
-}
-
-def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
- ts1 match {
- case Nil => None
- case t::ts =>
- if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
- else chop(ts, prefix, t::ts2)
- }
-
-// examples
-chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
-chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
-
-def replace[A](ts: List[A], out: List[A], in: List [A]) =
- chop(ts, out, Nil) match {
- case None => None
- case Some((before, after)) => Some(before ::: in ::: after)
- }
-
-def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match {
- case List(T_NT("E", tree)) => { println(tree); true }
- case _ => {
- val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs)))
- tss.flatten.exists(parse1(g, _))
- }
-}
-
-
-println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1"))
-println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)"))
-println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)"))
-
-
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/parser-combinators/build.sh Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -euo pipefail
+
+amm comb1.sc
+amm comb2.sc
+
--- a/progs/parser-combinators/comb1.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/parser-combinators/comb1.sc Thu Jul 30 13:50:54 2020 +0100
@@ -1,11 +1,14 @@
// Parser Combinators: Simple Version
//====================================
+//
+// Call with
+//
+// amm comb1.sc
-/*
- Note, in the lectures I did not show the implicit type constraint
- I : IsSeq, which means that the input type 'I' needs to be
- a sequence.
-*/
+
+// Note, in the lectures I did not show the implicit type constraint
+// I : IsSeq, which means that the input type 'I' needs to be
+// a sequence.
type IsSeq[A] = A => Seq[_]
@@ -33,12 +36,14 @@
def parse(in: I) = p.parse(in) ++ q.parse(in)
}
-// parser map
+// map parser
class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
f: T => S) extends Parser[I, S] {
def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
}
+
+
// an example of an atomic parser for characters
case class CharParser(c: Char) extends Parser[String, Char] {
def parse(in: String) =
@@ -59,6 +64,7 @@
val NumParser = RegexParser("[0-9]+".r)
def StrParser(s: String) = RegexParser(Regex.quote(s).r)
+
// NumParserInt transforms a "string integer" into a propper Int
// (needs "new" because MapParser is not a case class)
@@ -210,7 +216,7 @@
// a problem with the arithmetic expression parser: it
-// gets vert slow with deeply nested parentheses
+// gets very slow with deeply nested parentheses
println("Runtime problem")
println(E.parse("1"))
--- a/progs/parser-combinators/comb2.sc Mon Jul 27 11:02:48 2020 +0100
+++ b/progs/parser-combinators/comb2.sc Thu Jul 30 13:50:54 2020 +0100
@@ -1,18 +1,28 @@
-// Parser Combinators: Simple Version
+// Parser Combinators:
+// Simple Version for WHILE-language
//====================================
+//
+// with some added convenience for
+// map-parsers and grammar rules
+//
+// call with
+//
+// amm comb2.sc
-// more convenience for the map parsers later on
+
+
+// more convenience for the map parsers later on;
+// it allows writing nested patterns as
+// case x ~ y ~ z => ...
+
case class ~[+A, +B](_1: A, _2: B)
-/*
- Note, in the lectures I did not show the implicit type constraint
- I : IsSeq, which means that the input type 'I' needs to be
- a sequence.
-*/
+// constraint for the input
type IsSeq[A] = A => Seq[_]
+
abstract class Parser[I : IsSeq, T]{
def parse(in: I): Set[(T, I)]
@@ -141,7 +151,7 @@
(p"false".map[BExp]{ _ => False }) ||
(p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x }
-// statement
+// a single statement
lazy val Stmt: Parser[String, Stmt] =
((p"skip".map[Stmt]{_ => Skip }) ||
(IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } ||
@@ -223,7 +233,7 @@
def eval(bl: Block) : Env = eval_bl(bl, Map())
// parse + evaluate fib program; then lookup what is
-// stored under the variable result
+// stored under the variable "result"
println(eval(Stmts.parse_all(fib).head)("result"))
@@ -243,6 +253,7 @@
println(eval(Stmts.parse_all(factors).head))
+
// calculate all prime numbers up to a number
println("Primes")
--- a/progs/parser1.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-// A naive bottom-up parser with backtracking
-//
-// Needs:
-// :load matcher.scala
-
-// some regular expressions
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_NUM extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class NT(s: String) extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-type Grammar = List[(String, List[Token])]
-
-// grammar for arithmetic expressions
-val grammar =
- List ("F" -> List(T_NUM),
- "E" -> List(T_NUM),
- "E" -> List(NT("E"), T_OP("+"), NT("E")),
- "E" -> List(NT("E"), T_OP("-"), NT("E")),
- "E" -> List(NT("E"), T_OP("*"), NT("E")),
- "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
-
-
-def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
- ts1 match {
- case Nil => None
- case t::ts =>
- if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
- else chop(ts, prefix, t::ts2)
- }
-
-// examples for chop
-chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
-chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
-
-def replace[A](ts: List[A], out: List[A], in: List [A]) =
- chop(ts, out, Nil) match {
- case None => None
- case Some((before, after)) => Some(before ::: in ::: after)
- }
-
-def parse(g: Grammar, ts: List[Token]) : Boolean = {
- println(ts)
- if (ts == List(NT("E"))) true
- else {
- val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
- tss.flatten.exists(parse(g, _))
- }
-}
-
-def parser(g: Grammar, s: String) = {
- println("\n")
- parse(g, Tok.fromString(s))
-}
-
-
-
-parser(grammar, "2 + 3 * 4 + 1")
-parser(grammar, "(2 + 3) * (4 + 1)")
-parser(grammar, "(2 + 3) * 4 (4 + 1)")
-
-
-
--- a/progs/parser4.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-
-// parser combinators with input type I and return type T
-
-case class SubString(s: String, l: Int, h: Int) {
- def low = l
- def high = h
- def length = h - l
- def substring(l: Int = l, h: Int = h) = s.slice(l, h)
- def set(low: Int = l, high: Int = h) = SubString(s, low, high)
-
-}
-
-type Ctxt = List[(String, SubString)]
-
-abstract class Parser[T] {
-
- def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)]
-
- def parse_all(s: String) : Set[T] =
- for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head
-
- def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
- def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
-}
-
-class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
- def parse(sb: SubString, ctxt: Ctxt) =
- for ((head1, tail1) <- p.parse(sb, ctxt);
- (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
-}
-
-class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
-}
-
-class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
- def parse(sb: SubString, ctxt: Ctxt) =
- for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
-}
-
-case class SubStringParser(s: String) extends Parser[SubString] {
- val n = s.length
- def parse(sb: SubString, ctxt: Ctxt) = {
- if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s)
- Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n)))
- else Set()
- }
-}
-
-implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring())
-
-class IgnLst[T](p: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = {
- if (sb.length == 0) Set()
- else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
- yield (head, tail.set(high = tail.high + 1))
- }
-}
-
-class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = {
- val should_trim = ctxt.contains (nt, sb)
- if (should_trim && sb.length == 0) Set()
- else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
- else p.parse(sb, (nt, sb)::ctxt)
- }
-}
-
-// ambigous grammar
-lazy val E: Parser[Int] =
- new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} ||
- (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
- ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} ||
- "0" ==> { (s) => 0 } ||
- "1" ==> { (s) => 1 } ||
- "2" ==> { (s) => 2 } ||
- "3" ==> { (s) => 3 })
-
-println(E.parse_all("1+2*3+3"))
-
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/pprint/README Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,7 @@
+To obtain pdf via chrome
+
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --headless --disable-gpu --print-to-pdf index.html
+
+To autocrop the pdf to the smallest size
+
+pdf-crop-margins -s -u -p 0 output.pdf
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/pprint/style.css Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,99 @@
+@media print {
+ @page { margin: 0; }
+ body { margin: 1.6cm; }
+}
+
+
+body {
+ font-family: Calibri, Segoe, "Segoe UI", "Gill Sans", "Gill Sans MT", sans-serif;
+}
+
+/* It's supposed to look like a tree diagram */
+.tree, .tree ul, .tree li {
+ list-style: none;
+ margin: 0;
+ padding: 0;
+ position: relative;
+}
+
+.tree {
+ margin: 0 0 1em;
+ text-align: center;
+}
+.tree, .tree ul {
+ display: table;
+}
+.tree ul {
+ width: 100%;
+}
+ .tree li {
+ display: table-cell;
+ padding: .5em 0;
+ vertical-align: top;
+ }
+ /* _________ */
+ .tree li:before {
+ outline: solid 1px #666;
+ content: "";
+ left: 0;
+ position: absolute;
+ right: 0;
+ top: 0;
+ }
+ .tree li:first-child:before {left: 50%;}
+ .tree li:last-child:before {right: 50%;}
+
+ .tree code, .tree span {
+ border: solid .1em #666;
+ border-radius: .2em;
+ display: inline-block;
+ margin: 0 .2em .5em;
+ padding: .2em .5em;
+ position: relative;
+ }
+ /* If the tree represents DOM structure */
+ .tree code {
+ font-family: monaco, Consolas, 'Lucida Console', monospace;
+ }
+
+ /* | */
+ .tree ul:before,
+ .tree code:before,
+ .tree span:before {
+ outline: solid 1px #666;
+ content: "";
+ height: .5em;
+ left: 50%;
+ position: absolute;
+ }
+ .tree ul:before {
+ top: -.5em;
+ }
+ .tree code:before,
+ .tree span:before {
+ top: -.55em;
+ }
+
+/* The root node doesn't connect upwards */
+.tree > li {margin-top: 0;}
+ .tree > li:before,
+ .tree > li:after,
+ .tree > li > code:before,
+ .tree > li > span:before {
+ outline: none;
+ }
+
+
+/* Create two equal columns that floats next to each other */
+.column {
+ float: left;
+ width: 40%;
+ padding: 10px;
+}
+
+/* Clear floats after the columns */
+.row:after {
+ content: "";
+ display: table;
+ clear: both;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/pprint/tree.sc Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,109 @@
+// scalatags
+import ammonite.ops._
+
+import $ivy.`com.lihaoyi::scalatags:0.8.2`
+import scalatags.Text.all._
+import scalatags.Text._
+
+// regular expressions including records
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class SPECIAL(s: String) extends Rexp
+
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+def der(c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+}
+
+def ders(s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders_simp(s, simp(der(c, r)))
+}
+
+
+def pp(r: Rexp) : TypedTag[String] = r match {
+ case CHAR(c) => li(code(c.toString))
+ case ALT(r1, r2) => li(code("+"), ul(pp(r1), pp(r2)))
+ case SEQ(r1, r2) => li(code(raw("·")), ul(pp(r1), pp(r2)))
+ case STAR(r1) => li(code("*"), ul(pp(r1)))
+ case ZERO => li(code("0"))
+ case ONE => li(code("1"))
+ case SPECIAL(s) => li(code(raw(s)))
+}
+
+def mktree(r: Rexp) =
+ ul(cls := "tree")(pp(r))
+
+
+def index(rs: List[Rexp]) =
+ html(
+ head(link(rel := "stylesheet", href := "./style.css")),
+ body(rs.map(mktree))
+ )
+
+
+
+/*
+println(index(List(simple, test, evil2)))
+
+val simple = CHAR('a')
+val test = ALT(CHAR('a'), CHAR('b'))
+
+
+write.over(pwd/"t1.html",
+ index(List(evil2,
+ ders("a".toList, evil2),
+ ders("aa".toList, evil2),
+ ders("aaa".toList, evil2))))
+*/
+
+val evil2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+val r1 = STAR(ALT(SPECIAL("<i>r<sub>1</sub></i>"), SPECIAL("<i>r<sub>2</sub></i>")))
+val r2 = ALT(SPECIAL("<i>r<sub>1</sub></i>"), STAR(SPECIAL("<i>r<sub>2</sub></i>")))
+
+@main
+def main(fname: String) = {
+ val content = index(List(r1))
+ write.over(pwd / fname, content)
+}
--- a/progs/primes.while Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-// prints out prime numbers from
-// 2 to 100 (end)
-
-end := 100;
-n := 2;
-while (n < end) do {
- f := 2;
- tmp := 0;
- while ((f < n / 2 + 1) && (tmp == 0)) do {
- if ((n / f) * f == n) then { tmp := 1 } else { skip };
- f := f + 1
- };
- if (tmp == 0) then { write(n) } else { skip };
- n := n + 1
-}
\ No newline at end of file
--- a/progs/re-alt.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-trait RegExp {
- def nullable: Boolean
- def derive(c: Char): RegExp
-}
-
-case object Empty extends RegExp {
- def nullable = false
- def derive(c: Char) = Empty
-}
-
-case object Eps extends RegExp {
- def nullable = true
- def derive(c: Char) = Empty
-}
-
-case class Str(s: String) extends RegExp {
- def nullable = s.isEmpty
- def derive(c: Char) =
- if (s.isEmpty || s.head != c) Empty
- else Str(s.tail)
-}
-
-case class Cat(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable && s.nullable
- def derive(c: Char) =
- if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c))
- else Cat(r.derive(c), s)
-}
-
-case class Star(r: RegExp) extends RegExp {
- def nullable = true
- def derive(c: Char) = Cat(r.derive(c), this)
-}
-
-case class Or(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable || s.nullable
- def derive(c: Char) = Or(r.derive(c), s.derive(c))
-}
-
-case class And(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable && s.nullable
- def derive(c: Char) = And(r.derive(c), s.derive(c))
-}
-
-case class Not(r: RegExp) extends RegExp {
- def nullable = !r.nullable
- def derive(c: Char) = Not(r.derive(c))
-}
-
-
-
-
-object Matcher {
- def matches(r: RegExp, s: String): Boolean = {
- if (s.isEmpty) r.nullable
- else matches(r.derive(s.head), s.tail)
- }
-}
-
-
-object Pimps {
- implicit def string2RegExp(s: String) = Str(s)
-
- implicit def regExpOps(r: RegExp) = new {
- def | (s: RegExp) = Or(r, s)
- def & (s: RegExp) = And(r, s)
- def % = Star(r)
- def %(n: Int) = rep(r, n)
- def ? = Or(Eps, r)
- def ! = Not(r)
- def ++ (s: RegExp) = Cat(r, s)
- def ~ (s: String) = Matcher.matches(r, s)
- }
-
- implicit def stringOps(s: String) = new {
- def | (r: RegExp) = Or(s, r)
- def | (r: String) = Or(s, r)
- def & (r: RegExp) = And(s, r)
- def & (r: String) = And(s, r)
- def % = Star(s)
- def % (n: Int) = rep(Str(s), n)
- def ? = Or(Eps, s)
- def ! = Not(s)
- def ++ (r: RegExp) = Cat(s, r)
- def ++ (r: String) = Cat(s, r)
- def ~ (t: String) = Matcher.matches(s, t)
- }
-
- def rep(r: RegExp, n: Int): RegExp =
- if (n <= 0) Star(r)
- else Cat(r, rep(r, n - 1))
-}
-
-
-object Test {
- import Pimps._
-
- val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
- val int = ("+" | "-").? ++ digit.%(1)
- val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).?
-
- def main(args: Array[String]) {
- val ints = List("0", "-4534", "+049", "99")
- val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
- val errs = List("", "-", "+", "+-1", "-+2", "2-")
-
- ints.foreach(s => assert(int ~ s))
- reals.foreach(s => assert(!(int ~ s)))
- errs.foreach(s => assert(!(int ~ s)))
-
- ints.foreach(s => assert(real ~ s))
- reals.foreach(s => assert(real ~ s))
- errs.foreach(s => assert(!(real ~ s)))
- }
-}
--- a/progs/re-internal.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-
-// measures the time a function needs
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-for (i <- 1 to 10001 by 300) {
- val re = ("((a?){" + i + "})(a{" + i + "})")
- println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re))))
-}
-
-
-
--- a/progs/re-sulzmann-partial.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-abstract class Pat
-
-case class VAR(x: String, r: Rexp) extends Pat
-case class GRP(x: String, p: Pat) extends Pat
-case class PSEQ(p1: Pat, p2: Pat) extends Pat
-case class PALT(p1: Pat, p2: Pat) extends Pat
-case class PSTAR(p: Pat) extends Pat
-
-
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-def down (p: Pat) : Rexp = p match {
- case VAR(x: String, w: String, r: Rexp) => r
- case GRP(x: String, w: String, p: Pat) => down(p)
- case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2))
- case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2))
- case PSTAR(p: Pat) => STAR(down(p))
-}
-
-def patnullable (p: Pat) : Boolean = p match {
- case PVar(_, r) => nullable(r)
- case PSEQ(p1, p2) => patnullable(p1) && patnullable(p2)
- case PCHOICE(p1, p2) => patnullable(p1) || patnullable(p2)
- case PSTAR(p) => true
- case PatVar(_, p) => patnullable(p)
-}
-
-//type Env = Set[List[(String, String)]]
-type Env = Map[Int, String]
-
-def update(n: Int, c: Char) (env: Env) =
- env + (n -> (env.getOrElse(n, "") + c.toString))
-
-def pderivPat (p: Pat, c: Char) : Set[(Pat, Env => Env)] = p match {
- case PVar(n: Int, r: Rexp) => {
- val pds = pderiv(r, c)
- if (pds.isEmpty) Set()
- else Set((PVar(n, toRexp(pds.toList)), update(n, c)))
- }
- case PSEQ(p1: Pat, p2: Pat) => {
- val pats : Set[(Pat, Env => Env)] =
- for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, p2), f)
- if (nullable(strip(p1))) pats ++ pderivPat(p2, c)
- else pats
- }
- case PCHOICE(p1: Pat, p2: Pat) => pderivPat(p1, c) ++ pderivPat(p2, c)
- case PSTAR(p1: Pat) =>
- for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, PSTAR(p1)), f)
- case PatVar(n: Int, p1: Pat) =>
- for ((p, f) <- pderivPat(p1, c)) yield (PatVar(n, p), f compose (update (n, c)))
-}
-
-
-val p2 = PSEQ(PVar(1, STAR("A")), PVar(2, STAR("A")))
-pderivPat(p2, 'A').mkString("\n")
-
-
-def greedy_aux(env: Set[(Pat, Env)], w: List[Char]) : Set[Env] = w match {
- case Nil =>
- for ((p, e) <- env if patnullable(p)) yield e
- case c::cs => {
- val env2 = for ((p, e) <- env; (p1, f) <- pderivPat(p, c)) yield (p1, f(e))
- greedy_aux(env2, cs)
- }
-}
-
-def greedy(p: Pat, w: String) = {
- val res = greedy_aux(Set((p, Map())), w.toList)
- if (res.isEmpty) None else Some(res)
-}
-
-// some convenience for typing in regular expressions
-def charlist2rexp (s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps (r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-implicit def PatOps (p: Pat) = new {
- def | (q: Pat) = PALT(p, q)
- def % = PSTAR(p)
- def ~ (q: Pat) = PSEQ(p, q)
-}
-
-val p3 = PSTAR(PSEQ(PVar(1, "A"), PVar(2, "B")))
-
-greedy2(Set((p3, Map())), "ABAB".toList)
-
-
-val p4 = PVar(1, "A")
-greedy2(Set((p4, Map())), "A".toList)
-
-val p5 = PSEQ(PVar(1, "A"), PVar(1, "B"))
-greedy2(Set((p5, Map())), "AB".toList)
-
-val res = pderivPat(p5, 'A')
-for ((_, f) <- res) yield f(Map())
-
-
-val p6 = PatVar(4,PSTAR(PCHOICE(PCHOICE(PVar(1, "A"), PVar(2, "AB")), PVar(3, "B"))))
-
-greedy(p6, "ABA")
--- a/progs/re-sulzmann.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-abstract class Pat
-
-case class VAR(x: String, w: String, r: Rexp) extends Pat
-case class GRP(x: String, w: String, p: Pat) extends Pat
-case class PSEQ(p1: Pat, p2: Pat) extends Pat
-case class PALT(p1: Pat, p2: Pat) extends Pat
-case class PSTAR(p: Pat) extends Pat
-
-object VAR { def apply(x: String, r: Rexp) : VAR = VAR(x, "", r) }
-object GRP { def apply(x: String, p: Pat) : GRP = GRP(x, "", p) }
-
-
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
-}
-
-def down (p: Pat) : Rexp = p match {
- case VAR(x: String, w: String, r: Rexp) => r
- case GRP(x: String, w: String, p: Pat) => down(p)
- case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2))
- case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2))
- case PSTAR(p: Pat) => STAR(down(p))
-}
-
-def empty (p: Pat) : Pat = p match {
- case VAR(x: String, w: String, r: Rexp) =>
- if (nullable(r)) VAR(x, w, EMPTY)
- else VAR(x, w, NULL)
- case GRP(x: String, w: String, p: Pat) => GRP(x, w, empty(p))
- case PSEQ(p1: Pat, p2: Pat) => PSEQ(empty(p1), empty(p2))
- case PALT(p1: Pat, p2: Pat) => PALT(empty(p1), empty(p2))
- case PSTAR(p: Pat) => PSTAR(empty(p))
-}
-
-def patder (c: Char, p: Pat) : Pat = p match {
- case VAR(x: String, w: String, r: Rexp) => VAR(x, w + c, der(c, r))
- case GRP(x: String, w: String, p: Pat) => GRP(x, w + c, patder(c, p))
- case PSEQ(p1: Pat, p2: Pat) =>
- if (nullable(down(p1))) PALT(PSEQ(patder(c, p1), p2), PSEQ(empty(p1), patder(c, p2)))
- else PSEQ(patder(c, p1), p2)
- case PALT(p1: Pat, p2: Pat) => PALT(patder(c, p1), patder(c, p2))
- case PSTAR(p: Pat) => PSEQ(patder(c, p), PSTAR(p))
-}
-
-def patders (s: List[Char], p: Pat) : Pat = s match {
- case Nil => p
- case c::s => patders(s, patder(c, p))
-}
-
-type Env = Set[List[(String, String)]]
-
-def special (p: Pat, env: Env) : Env =
- if (env == Set() && nullable(down(p))) Set(Nil) else env
-
-def env (p: Pat) : Env = p match {
- case VAR(x: String, w: String, r: Rexp) =>
- if (nullable(r)) Set(List((x, w))) else Set()
- case GRP(x: String, w: String, p1: Pat) =>
- special(p, for (e <- env(p1)) yield List((x, w)) ++ e)
- case PSEQ(p1: Pat, p2: Pat) =>
- special(p, for (e1 <- env(p1); e2 <- env(p2)) yield (e1 ++ e2))
- case PALT(p1: Pat, p2: Pat) => special(p, env(p1) ++ env(p2))
- case PSTAR(p1: Pat) => special(p, env(p1))
-}
-
-def matcher (p: Pat, s: String) = env(empty(patders(s.toList, p)))
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp (s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps (r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-implicit def PatOps (p: Pat) = new {
- def | (q: Pat) = PALT(p, q)
- def % = PSTAR(p)
- def ~ (q: Pat) = PSEQ(p, q)
-}
-
-
-val p0 = VAR("x", "A")
-
-patders("A".toList, p0)
-matcher(p0, "A")
-
-val p1 = VAR("x", "A") | VAR("y", "A") | VAR("z", "B")
-
-patders("A".toList, p1)
-matcher(p1, "A")
-matcher(p1, "B")
-
-val p2 = VAR("x", "AB")
-matcher(p2, "AB")
-matcher(p2, "AA")
-
-val p3 = VAR("x", "A" ~ "B")
-matcher(p3, "AB")
-matcher(p3, "AA")
-
-val p4 = VAR("x", "A") ~ VAR("y", "B")
-matcher(p4, "AB")
-matcher(p4, "AA")
-
-val p5 = VAR("x", "A".%)
-matcher(p5, "A")
-matcher(p5, "AA")
-matcher(p5, "")
-matcher(p5, "AAA")
-
-val p6 = VAR("x", "A").%
-matcher(p6, "A")
-matcher(p6, "AA")
-matcher(p6, "")
-matcher(p6, "AAA")
-
-val p7 = (VAR("x", "A") | VAR("y", "AB") | VAR("z", "B")).%
-matcher(p7, "AB")
-
--- a/progs/re.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-
-// regular expressions including NOT
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-
-// regular expression for specifying
-// ranges of characters
-def RANGE(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), RANGE(s))
-}
-
-//one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-
-//some regular expressions
-val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
-val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
-val LETTER = ALT(LOWERCASE, UPPERCASE)
-val DIGIT = RANGE("0123456789".toList)
-val NONZERODIGIT = RANGE("123456789".toList)
-
-val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val WHITESPACE = RANGE(" \n".toList)
-val WHITESPACES = PLUS(WHITESPACE)
-
-val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
-val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
-
-
-// an example list of regular expressions
-val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
-
-
-def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
-
-def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, t)
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), s, t ::: List(c))
- }
-
-def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
- val somes = regs.map { munch(_, s, Nil) } .flatten
- if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
-}
-
-def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
- case Nil => Nil
- case _ => one_string(regs, s) match {
- case (rest, s) => s.mkString :: tokenize(regs, rest)
- }
-}
-
-//examples
-println(tokenize(regs, "if true then then 42 else +".toList))
-println(tokenize(regs, "if+true+then+then+42+else +".toList))
-println(tokenize(regs, "ifff if 34 34".toList))
-println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
-println(tokenize(regs, "/* if true then */ then 42 else +".toList))
-//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- a/progs/re0.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-import scala.annotation.tailrec
-import scala.language.implicitConversions
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case object ALLCHAR extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class STR(s: String) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-case class REP(r: Rexp, n: Int) extends Rexp
-
-// some convenience for typing in regular expressions
-implicit def string2rexp(s : String) : Rexp = STR(s)
-
-implicit def RexpOps(r: Rexp) : Rexp = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def %(n: Int) = REP(r, n)
- def %%(n: Int) = SEQ(REP(r, n), STAR(r))
- def ? = ALT(EMPTY, r)
- def unary_! = NOT(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) : Rexp = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def %(n: Int) = REP(s, n)
- def %%(n: Int) = SEQ(REP(s, n), STAR(s))
- def ? = ALT(EMPTY, s)
- def unary_! = NOT(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case ALLCHAR => false
- case CHAR(_) => false
- case STR(s) => s.isEmpty
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
- case REP(r, i) => if (i == 0) true else nullable(r)
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case ALLCHAR => EMPTY
- case CHAR(d) => if (c == d) EMPTY else NULL
- case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail)
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
- case REP(r, i) =>
- if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1))
-}
-
-
-// derivative w.r.t. a string (iterates der)
-@tailrec
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-//examples
-val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val int = ("+" | "-").? ~ digit.%%(1)
-val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
-
-val ints = List("0", "-4534", "+049", "99")
-val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
-val errs = List("", "-", "+", "+-1", "-+2", "2-")
-
-ints.map(s => matcher(int, s))
-reals.map(s => matcher(int, s))
-errs.map(s => matcher(int, s))
-
-ints.map(s => matcher(real, s))
-reals.map(s => matcher(real, s))
-errs.map(s => matcher(real, s))
-
-
-
-def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-for (i <- 1 to 12000 by 500) {
- println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
-}
-
-
--- a/progs/re1.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-// A simple matcher for basic regular expressions
-
-abstract class Rexp
-case object ZERO extends Rexp // matches nothing
-case object ONE extends Rexp // matches an empty string
-case class CHAR(c: Char) extends Rexp // matches a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
-case class STAR(r: Rexp) extends Rexp // star
-
-// nullable function: tests whether a regular
-// expression can recognise the empty string
-def nullable(r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-// the derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
-}
-
-// the derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// the main matcher function
-def matcher(r: Rexp, s: String) : Boolean =
- nullable(ders(s.toList, r))
-
-
-// examples from the homework
-
-val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
-der('a', r)
-der('b', r)
-der('c', r)
-
-val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
-der('x', r2)
-der('y', der('x', r2))
-der('z', der('y', der('x', r2)))
-
-
-// the optional regular expression (one or zero times)
-def OPT(r: Rexp) = ALT(r, ONE)
-
-// the n-times regular expression (explicitly expanded)
-def NTIMES(r: Rexp, n: Int) : Rexp = n match {
- case 0 => ONE
- case 1 => r
- case n => SEQ(r, NTIMES(r, n - 1))
-}
-
-
-// Test Cases
-
-// the evil regular expression a?{n} a{n}
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-
-// the evil regular expression (a*)*b
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-
-// for measuring time
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start) / (i * 1.0e9)
-}
-
-
-// test: (a?{n}) (a{n})
-println("Test (a?{n}) (a{n})")
-
-for (i <- 0 to 20 by 2) {
- println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
-}
-
-// test: (a*)* b
-println("Test (a*)* b")
-
-for (i <- 0 to 20 by 2) {
- println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
-}
-
-
-// the size of a regular expressions - for testing purposes
-def size(r: Rexp) : Int = r match {
- case ZERO => 1
- case ONE => 1
- case CHAR(_) => 1
- case ALT(r1, r2) => 1 + size(r1) + size(r2)
- case SEQ(r1, r2) => 1 + size(r1) + size(r2)
- case STAR(r) => 1 + size(r)
-}
-
-// the expicit expansion in EVIL1(n) increases
-// drastically its size
-
-size(EVIL1(1)) // 5
-size(EVIL1(3)) // 17
-size(EVIL1(5)) // 29
-size(EVIL1(7)) // 41
-size(EVIL1(20)) // 119
-
-// given a regular expression and building successive
-// derivatives might result into bigger and bigger
-// regular expressions...here is an example for this:
-
-// (a+b)* o a o b o (a+b)*
-val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b')))
-val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux)))
-
-size(ders("".toList, BIG)) // 13
-size(ders("ab".toList, BIG)) // 51
-size(ders("abab".toList, BIG)) // 112
-size(ders("ababab".toList, BIG)) // 191
-size(ders("abababab".toList, BIG)) // 288
-size(ders("ababababab".toList, BIG)) // 403
-size(ders("abababababab".toList, BIG)) // 536
-
-
-size(ders(("ab" * 200).toList, BIG)) // 366808
-
-for (i <- 0 to 200 by 10) {
- println(f"$i: ${time_needed(2, matcher(BIG, "ab" * i))}%.5f")
-}
-
-
-
-
-//////////////////////////////////////
-def concat(A: Set[String], B: Set[String]) : Set[String] =
- for (s1 <- A; s2 <- B) yield s1 ++ s2
-
-
-val A = Set("foo", "bar")
-val B = Set("a", "b")
-
-
--- a/progs/re2.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-// A Version with an explicit n-times regular expression;
-// this keeps the size of the regular expression in the
-// EVIL1 test-case quite small
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor for n-times
-
-
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
-}
-
-
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
- case NTIMES(r1, i) =>
- if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
-}
-
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-// the optional regular expression: one or zero times
-// this regular expression is still defined in terms of ALT
-def OPT(r: Rexp) = ALT(r, ONE)
-
-
-// Test Cases
-
-// evil regular expressions
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start) / (i * 1.0e9)
-}
-
-
-
-// test: (a?{n}) (a{n})
-for (i <- 0 to 1000 by 100) {
- println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
-}
-
-// test: (a*)* b
-for (i <- 0 to 20) {
- println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
-}
-
-
-// the size of a regular expressions - for testing purposes
-def size(r: Rexp) : Int = r match {
- case ZERO => 1
- case ONE => 1
- case CHAR(_) => 1
- case ALT(r1, r2) => 1 + size(r1) + size(r2)
- case SEQ(r1, r2) => 1 + size(r1) + size(r2)
- case STAR(r) => 1 + size(r)
- case NTIMES(r, _) => 1 + size(r)
-}
-
-// EVIL1(n) has now a constant size, no matter
-// what n is; also the derivative only grows
-// moderately
-
-size(EVIL1(1)) // 7
-size(EVIL1(3)) // 7
-size(EVIL1(5)) // 7
-size(EVIL1(7)) // 7
-size(EVIL1(20)) // 7
-
-size(ders("".toList, EVIL1(5))) // 7
-size(ders("a".toList, EVIL1(5))) // 16
-size(ders("aa".toList, EVIL1(5))) // 35
-size(ders("aaa".toList, EVIL1(5))) // 59
-size(ders("aaaa".toList, EVIL1(5))) // 88
-size(ders("aaaaa".toList, EVIL1(5))) // 122
-size(ders("aaaaaa".toList, EVIL1(5))) // 151
-
-// but the size of the derivatives can still grow
-// quite dramatically in case of EVIL2
-
-size(ders("".toList, EVIL2)) // 5
-size(ders("a".toList, EVIL2)) // 12
-size(ders("aa".toList, EVIL2)) // 28
-size(ders("aaa".toList, EVIL2)) // 58
-size(ders("aaaa".toList, EVIL2)) // 116
-size(ders("aaaaa".toList, EVIL2)) // 230
-size(ders("aaaaaa".toList, EVIL2)) // 456
-
-size(ders(("a" * 20).toList, EVIL2)) // 7340068
\ No newline at end of file
--- a/progs/re3.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-// A version with simplification of derivatives;
-// this keeps the regular expressions small, which
-// is good for the run-time
-
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-
-
-
-// the nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
-}
-
-// the derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
- case NTIMES(r, i) =>
- if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
-}
-
-def simp(r: Rexp) : Rexp = r match {
- case ALT(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- case r => r
-}
-
-
-// the derivative w.r.t. a string (iterates der)
-def ders(s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, simp(der(c, r)))
-}
-
-
-// the main matcher function
-def matcher(r: Rexp, s: String) : Boolean =
- nullable(ders(s.toList, r))
-
-
-// one or zero
-def OPT(r: Rexp) = ALT(r, ONE)
-
-
-// Test Cases
-
-// evil regular expressions: (a?){n} a{n} and (a*)* b
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-//test: (a?{n}) (a{n})
-for (i <- 0 to 8000 by 1000) {
- println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f")
-}
-
-//test: (a*)* b
-for (i <- 0 to 6000000 by 500000) {
- println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f")
-}
-
-
-// size of a regular expressions - for testing purposes
-def size(r: Rexp) : Int = r match {
- case ZERO => 1
- case ONE => 1
- case CHAR(_) => 1
- case ALT(r1, r2) => 1 + size(r1) + size(r2)
- case SEQ(r1, r2) => 1 + size(r1) + size(r2)
- case STAR(r) => 1 + size(r)
- case NTIMES(r, _) => 1 + size(r)
-}
-
-
-// now the size of the derivatives grows
-// much, much slower
-
-size(ders("".toList, EVIL2)) // 5
-size(ders("a".toList, EVIL2)) // 8
-size(ders("aa".toList, EVIL2)) // 8
-size(ders("aaa".toList, EVIL2)) // 8
-size(ders("aaaa".toList, EVIL2)) // 8
-size(ders("aaaaa".toList, EVIL2)) // 8
-
-
-
-
-
--- a/progs/re3a.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-case class UPNTIMES(r: Rexp, n: Int) extends Rexp
-
-// the nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
- case UPNTIMES(r: Rexp, n: Int) => true
-}
-
-// the derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
- case NTIMES(r1, i) =>
- if (i == 0) ZERO else
- if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1))
- else SEQ(der(c, r1), NTIMES(r1, i - 1))
- case UPNTIMES(r1, i) =>
- if (i == 0) ZERO
- else SEQ(der(c, r1), UPNTIMES(r1, i - 1))
-}
-
-def simp(r: Rexp) : Rexp = r match {
- case ALT(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- case r => r
-}
-
-
-// the derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, simp(der(c, r)))
-}
-
-
-// the main matcher function
-def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-// one or zero
-def OPT(r: Rexp) = ALT(r, ONE)
-
-// evil regular expressions
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-val EVIL3 = SEQ(STAR(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a')))), CHAR('b'))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- "%.5f".format((end - start) / (i * 1.0e9))
-}
-
-
-// test: (a?{n}) (a{n})
-for (i <- 0 to 8000 by 1000) {
- println(s"$i: ${time_needed(2, matches(EVIL1(i), "a" * i))}")
-}
-
-// test: (a*)* b
-for (i <- 0 to 6000000 by 500000) {
- println(s"$i: ${time_needed(2, matches(EVIL2, "a" * i))}")
-}
-
-
-val r0 = simp(der('a', EVIL3))
-val r1 = simp(der('a', r0))
-val r2 = simp(der('a', r1))
-val r3 = simp(der('a', r2))
-val r4 = simp(der('a', r3))
-val r5 = simp(der('a', r4))
-val r6 = simp(der('a', r5))
-
-//test: (a|aa)* b
-/*
-for (i <- 0 to 100 by 10) {
- println(s"$i: ${time_needed(2, matches(EVIL3, "a" * i ++ "c"))}")
-}
- */
-
--- a/progs/re3ext.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-
-def simp(r: Rexp): Rexp = r match {
- case ALT(r1, r2) => {
- (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
- }
- }
- case SEQ(r1, r2) => {
- (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- }
- case NTIMES(r, n) => NTIMES(simp(r), n)
- case r => r
-}
-
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
-}
-
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NTIMES(r, i) =>
- if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
-}
-
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, simp(der(c, r)))
-}
-
-def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-//one or zero
-def OPT(r: Rexp) = ALT(r, ONE)
-
-def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-/*
-for (i <- 1 to 9001 by 1000) {
- println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
-}
-*/
-
-// some convenience for typing in regular expressions
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => ONE
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps (r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps (s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-def RANGE(s: List[Char]) : Rexp = s match {
- case Nil => ZERO
- case c::s => ALT(CHAR(c), RANGE(s))
-}
-def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp =
- if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1))
-
-
-println("Coursework:")
-val REG1 = PLUS(PLUS("a" ~ "a" ~ "a"))
-val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a?
-
-
-//40 * aaa matches
-//43 * aaa + aa does not
-//45 * aaa + a
-
-println("EVIL1:")
-println(matches(REG1, "aaa" * 40))
-println(matches(REG1, "aaa" * 43 + "aa"))
-println(matches(REG1, "aaa" * 45 + "a"))
-println("EVIL2:")
-println(matches(REG2, "aaa" * 40))
-println(matches(REG2, "aaa" * 43 + "aa"))
-println(matches(REG2, "aaa" * 45 + "a"))
-
-println("EMAIL:")
-val LOWERCASE = "abcdefghijklmnopqrstuvwxyz"
-val DIGITS = "0123456789"
-val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~
- PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~
- NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6)
-
-val my_email = "christian.urban@kcl.ac.uk"
-
-println(matches(EMAIL, my_email))
-
-println(matches(NTIMES("a", 2), "a"))
-println(matches(NTIMES("a", 2), "aa"))
-println(matches(NTIMES("a", 2), "aaa"))
-
-println(matches(NMTIMES("a", 2, 3), "a"))
-println(matches(NMTIMES("a", 2, 3), "aa"))
-println(matches(NMTIMES("a", 2, 3), "aaa"))
-println(matches(NMTIMES("a", 2, 3), "aaaa"))
--- a/progs/re4.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,156 +0,0 @@
-// A version which attempts to move whole strings, not
-// just characters, under derivatives whenever possible
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-
-// the nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
-}
-
-// the derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
- case NTIMES(r1, i) =>
- if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
-}
-
-def simp(r: Rexp) : Rexp = r match {
- case ALT(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- case r => r
-}
-
-// an example
-val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
-der('x', r)
-der('y', der('x', r))
-der('z', der('y', der('x', r)))
-simp(der('z', der('y', der('x', r))))
-
-// *new*
-// the derivative w.r.t. a string (iterates der)
-def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
- case (Nil, r) => r
- case (s, ZERO) => ZERO
- case (s, ONE) => if (s == Nil) ONE else ZERO
- case (s, CHAR(c)) => if (s == List(c)) ONE else
- if (s == Nil) CHAR(c) else ZERO
- case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2))
- case (c::s, r) => ders2(s, simp(der(c, r)))
-}
-
-// the main matcher function
-def matcher(r: Rexp, s: String) : Boolean =
- nullable(ders2(s.toList, r))
-
-
-// one or zero
-def OPT(r: Rexp) = ALT(r, ONE)
-
-
-// Test Cases
-
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
-
-// for measuring time
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start) / (i * 1.0e9)
-}
-
-
-// test: (a?{n}) (a{n})
-for (i <- 0 to 11000 by 1000) {
- println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
-}
-
-
-// test: (a*)* b
-for (i <- 0 to 7000000 by 500000) {
- println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
-}
-
-
-
-// the size of a regular expressions - for testing purposes
-def size(r: Rexp) : Int = r match {
- case ZERO => 1
- case ONE => 1
- case CHAR(_) => 1
- case ALT(r1, r2) => 1 + size(r1) + size(r2)
- case SEQ(r1, r2) => 1 + size(r1) + size(r2)
- case STAR(r) => 1 + size(r)
- case NTIMES(r, _) => 1 + size(r)
-}
-
-
-// string of a regular expressions - for testing purposes
-def string(r: Rexp) : String = r match {
- case ZERO => "0"
- case ONE => "1"
- case CHAR(c) => c.toString
- case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})"
- case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
- case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
- case STAR(r) => s"(${string(r)})*"
- case NTIMES(r, n) => s"(${string(r)}){${n}}"
-}
-
-
-// test: ("a" | "aa")*
-val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
-
-// test: ("" | "a" | "aa")*
-val EVIL4 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))))
-
-val t1 = ders2("a".toList, EVIL3)
-val t1s = simp(t1)
-val t2 = ders2("aa".toList, t1s)
-val t2s = simp(t2)
-val t3 = ders2("aaa".toList, t2s)
-val t3s = simp(t3)
-val t4 = ders2("aaaa".toList, t3s)
-val t4s = simp(t4)
-println(string(t1) + " " + size(t1))
-println("s " + string(t1s) + " " + size(t1s))
-println(string(t2) + " " + size(t2))
-println("s " + string(t2s) + " " + size(t2s))
-println(string(t3) + " " + size(t3))
-println("s " + string(t3s) + " " + size(t3s))
-println(string(t4) + " " + size(t4))
-println("s " + string(t4s) + " " + size(t4s))
--- a/progs/regexp2.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-
-// regular expressions including NOT
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-
-// regular expression for specifying
-// ranges of characters
-def RANGE(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), RANGE(s))
-}
-
-//one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-
-//some regular expressions
-val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
-val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
-val LETTER = ALT(LOWERCASE, UPPERCASE)
-val DIGIT = RANGE("0123456789".toList)
-val NONZERODIGIT = RANGE("123456789".toList)
-
-val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val WHITESPACE = RANGE(" \n".toList)
-val WHITESPACES = PLUS(WHITESPACE)
-
-val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
-val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
-
-
-// an example list of regular expressions
-val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
-
-
-def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
-
-def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, t)
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), s, t ::: List(c))
- }
-
-def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
- val somes = regs.map { munch(_, s, Nil) } .flatten
- if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
-}
-
-def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
- case Nil => Nil
- case _ => one_string(regs, s) match {
- case (rest, s) => s.mkString :: tokenize(regs, rest)
- }
-}
-
-//examples
-println(tokenize(regs, "if true then then 42 else +".toList))
-println(tokenize(regs, "if+true+then+then+42+else +".toList))
-println(tokenize(regs, "ifff if 34 34".toList))
-println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
-println(tokenize(regs, "/* if true then */ then 42 else +".toList))
-//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- a/progs/rev.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-def rev(r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ONE
- case CHAR(c) => CHAR(c)
- case ALT(r1, r2) => ALT(rev(r1), rev(r2))
- case SEQ(r1, r2) => SEQ(rev(r2), rev(r1))
- case STAR(r) => STAR(rev(r))
-}
--- a/progs/token-bak.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-import scala.util._
-import scala.annotation.tailrec
-
-sealed abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
-val SEMI: Rexp = ";"
-val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-
-//regular expressions ranked by position in the list
-val regs: List[Rexp] =
- List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE)
-
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-def zeroable (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case CHAR(_) => false
- case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
- case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
- case STAR(_) => false
-}
-
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
-}
-
-
-// calculates derivatives until all of them are zeroable
-@tailrec
-def munch(s: List[Char],
- pos: Int,
- rs: List[Rexp],
- last: Option[Int]): Option[Int] = rs match {
- case Nil => last
- case rs if (s.length <= pos) => last
- case rs => {
- val ders = rs.map(der(s(pos), _))
- val rs_nzero = ders.filterNot(zeroable(_))
- val rs_nulls = ders.filter(nullable(_))
- val new_last = if (rs_nulls != Nil) Some(pos) else last
- munch(s, 1 + pos, rs_nzero, new_last)
- }
-}
-
-// iterates the munching function and prints
-// out the component strings
-@tailrec
-def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match {
- case None if (s == "") => println("EOF")
- case None => println(s"Lexing error: $s")
- case Some(n) => {
- val (head, tail) = s.splitAt(n + 1)
- print(s"|${head.replaceAll("\n","RET")}|")
- tokenize(tail, rs)
- }
-}
-
-
-val test_prog = """
-start := XXX;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-};
-write x;
-write y;
-write z
-"""
-
-tokenize(test_prog, regs)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/while-tests/collatz.while Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,8 @@
+write "Input a number ";
+read n;
+while n > 1 do {
+ if n % 2 == 0
+ then n := n/2
+ else n := 3*n+1;
+};
+write "Yes";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/while-tests/factors.while Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,14 @@
+// Find all factors of a given input number
+// by J.R. Cordy August 2005
+
+write "Input n please";
+read n;
+write "The factors of n are";
+f := 2;
+while n != 1 do {
+ while (n / f) * f == n do {
+ write f;
+ n := n / f
+ };
+ f := f + 1
+}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/while-tests/fib.while Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,13 @@
+write "Fib";
+read n;
+minus1 := 0;
+minus2 := 1;
+while n > 0 do {
+ temp := minus2;
+ minus2 := minus1 + minus2;
+ minus1 := temp;
+ n := n - 1
+};
+write "Result";
+write minus2
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/while-tests/loops.while Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,14 @@
+start := 1000;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do { z := z - 1 };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/while-tests/primes.while Thu Jul 30 13:50:54 2020 +0100
@@ -0,0 +1,15 @@
+// prints out prime numbers from
+// 2 to 100 (end)
+
+end := 100;
+n := 2;
+while (n < end) do {
+ f := 2;
+ tmp := 0;
+ while ((f < n / 2 + 1) && (tmp == 0)) do {
+ if ((n / f) * f == n) then { tmp := 1 } else { skip };
+ f := f + 1
+ };
+ if (tmp == 0) then { write(n) } else { skip };
+ n := n + 1
+}
\ No newline at end of file
--- a/progs/while.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,244 +0,0 @@
-// A parser and evaluator for the while language
-//
-import matcher._
-import parser._
-
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[(Rexp, List[Char] => Token)] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-def len(xs: List[(Int, Int)]): Int = xs match {
- case Nil => 0
- case (1, x)::xs => len(xs) + 1
- case (_, x)::xs => len(xs)
-}
-
-def fst(p: (Int, Int)): Int = p match {
- case Nil => 0
- case (1, x)::xs => len(xs) + 1
- case (_, x)::xs => len(xs)
-}
-
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } ||
- (T_KWD("true") ==> ((_) => True)) ||
- (T_KWD("false") ==> ((_) => False: BExp))
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-
-// examples
-val p1 = "x := 5"
-val p1_toks = Tok.fromString(p1)
-val p1_ast = Block.parse_all(p1_toks)
-println(p1_toks)
-println(p1_ast)
-
-val p1a = "{ x := 5; y := 8}"
-val p1a_toks = Tok.fromString(p1a)
-val p1a_ast = Block.parse_all(p1a_toks)
-println(p1a_ast)
-
-val p2 = "5 = 6"
-val p2_toks = Tok.fromString(p2)
-val p2_ast = BExp.parse_all(p2_toks)
-println(p2_ast)
-
-val p2a = "true"
-val p2a_toks = Tok.fromString(p2a)
-val p2a_ast = BExp.parse_all(p2a_toks)
-println(p2a_ast)
-
-val p3 = "if true then skip else skip"
-val p3_toks = Tok.fromString(p3)
-val p3_ast = Stmt.parse_all(p3_toks)
-println(p3_ast)
-
-val p3a = "if true then x := 5 else x := 10"
-val p3a_toks = Tok.fromString(p3a)
-val p3a_ast = Stmt.parse_all(p3a_toks)
-println(p3a_ast)
-
-val p3b = "if false then x := 5 else x := 10"
-val p3b_toks = Tok.fromString(p3b)
-val p3b_ast = Stmt.parse_all(p3b_toks)
-println(p3b_ast)
-
-// multiplication
-val p4 = """{ x := 5;
- y := 4;
- r := 0;
- while y > 0 do {
- r := r + x;
- y := y - 1
- }
- }"""
-val p4_toks = Tok.fromString(p4)
-val p4_ast = Block.parse_all(p4_toks)
-println(p4_ast)
-
-val p5 = """
- n := 9;
- minus1 := 0;
- minus2 := 1;
- temp := 0;
- while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
- };
- fib_res := minus2
-"""
-val p5_toks = Tok.fromString(p5)
-val p5_ast = Stmts.parse_all(p5_toks)
-
-// interpreter
-type Env = Map[String, Int]
-
-def eval_bexp(b: BExp, env: Env) : Boolean = b match {
- case True => true
- case False => false
- case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
- case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
- case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
- case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
-}
-
-def eval_aexp(a: AExp, env : Env) : Int = a match {
- case Num(i) => i
- case Var(s) => env(s)
- case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
- case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
- case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
-}
-
-def eval_stmt(s: Stmt, env: Env) : Env = s match {
- case Skip => env
- case Assign(x, a) => env + (x -> eval_aexp(a, env))
- case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
- case While(b, bl) =>
- if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
- else env
-}
-
-def eval_bl(bl: Block, env: Env) : Env = bl match {
- case Nil => env
- case s::bl => eval_bl(bl, eval_stmt(s, env))
-}
-
-//examples
-println(eval_stmt(p3a_ast.head, Map.empty))
-println(eval_stmt(p3b_ast.head, Map.empty))
-println(eval_bl(p4_ast.head, Map.empty))
-println(eval_bl(p5_ast.head, Map.empty))
--- a/progs/while1.scala Mon Jul 27 11:02:48 2020 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,221 +0,0 @@
-// A parser and evaluator for the WHILE language
-//
-
-
-
-
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_COMMENT extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[(Rexp, List[Char] => Token)] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE),
- (COMMENT, (s) => T_COMMENT))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-case class Write(s: String) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (T_KWD("true") ==> ((_) => True: BExp)) ||
- (T_KWD("false") ==> ((_) => False: BExp)) ||
- (T_LPAREN ~> BExp <~ T_RPAREN) ||
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp }
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
- (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-// interpreter
-type Env = Map[String, Int]
-
-def eval_bexp(b: BExp, env: Env) : Boolean = b match {
- case True => true
- case False => false
- case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
- case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
- case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
-}
-
-def eval_aexp(a: AExp, env : Env) : Int = a match {
- case Num(i) => i
- case Var(s) => env(s)
- case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
- case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
- case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
-}
-
-def eval_stmt(s: Stmt, env: Env) : Env = s match {
- case Skip => env
- case Assign(x, a) => env + (x -> eval_aexp(a, env))
- case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
- case While(b, bl) =>
- if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
- else env
- case Write(x) => { println(env(x)); env }
-}
-
-def eval_bl(bl: Block, env: Env) : Env = bl match {
- case Nil => env
- case s::bl => eval_bl(bl, eval_stmt(s, env))
-}
-
-def eval_prog(name: String) : Env = {
- val tks = Tok.fromFile(name)
- val ast = Stmts.parse_single(tks)
- eval_bl(ast, Map.empty)
-}
-
-
-//examples
-
-//eval_prog("loops.while")
-eval_prog("fib.while")
-
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-val test_prog = """
-start := XXX;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-"""
-
-
-
-def eval_test(n: Int) : Unit = {
- val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
- val ast = Stmts.parse_single(tks)
- println(n + " " + time_needed(2, eval_bl(ast, Map.empty)))
-}
-
-List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_))
-
-
-
-
-
-
-