# HG changeset patch # User Christian Urban # Date 1596113454 -3600 # Node ID b5b5583a3a08f8c7deb76bc85aacdb3c44676788 # Parent e66bd5c563eb362e051efd0e32287d58528a1ee4 updated diff -r e66bd5c563eb -r b5b5583a3a08 Attic/detokenise.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/detokenise.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,42 @@ +// Detokenising the ouput of Tokeniser +//===================================== +// +// call with +// +// scala detokenise.scala fib.tks +// +// scala detokenise.scala loops.tks + +object Detokenise { + +import java.io._ +import scala.util._ + +abstract class Token extends Serializable +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KWD(s: String) extends Token +case class T_STR(s: String) extends Token + +def deserialise[T](fname: String) : Try[T] = { + //import scala.util.Using + //Using(new ObjectInputStream(new FileInputStream(fname))) { + // in => in.readObject.asInstanceOf[T] + //} + Try(new ObjectInputStream(new FileInputStream(fname))).get { + in => in.readObject.asInstanceOf[T] + } +} + +def main(args: Array[String]) = { + val fname = args(0) + val tks = deserialise[List[Token]](fname).getOrElse(Nil) + println(s"Reading back from ${fname}:\n${tks.mkString(", ")}") +} + + +} diff -r e66bd5c563eb -r b5b5583a3a08 Attic/i.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/i.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,492 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLC extends Rexp // recognises any character +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp // negation of a regular expression + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLC => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// regular expression for specifying +// ranges of characters +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// many alternatives +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +// repetitions +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +type Rule[T] = (Rexp, List[Char] => T) + +case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { + + def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + + def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) + } + + def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } + } + + def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + + def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + +// parser combinators with input type I and return type T + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head + + def parse_single(ts: I) : T = parse_all(ts).toList match { + case t::Nil => t + case _ => { println ("Parse Error") ; sys.exit(-1) } + } + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +// A parser and evaluator for teh while language +// +//:load matcher.scala +//:load parser3.scala + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[Rule[Token]] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Relop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// compiler +val beginning = """ +.class public XXX.XXX +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +""" + +val ending = """ + + return + +.end method +""" + +// for generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +type Env = Map[String, String] +type Instrs = List[String] + +def compile_aexp(a: AExp, env : Env) : Instrs = a match { + case Num(i) => List("ldc " + i.toString + "\n") + case Var(s) => List("iload " + env(s) + "\n") + case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") + case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") + case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") +} + +def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { + case True => Nil + case False => List("goto " + jmp + "\n") + case Relop("=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") + case Relop("!=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") + case Relop("<", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") +} + + +def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { + case Skip => (Nil, env) + case Assign(x, a) => { + val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString + (compile_aexp(a, env) ++ + List("istore " + index + "\n"), env + (x -> index)) + } + case If(b, bl1, bl2) => { + val if_else = Fresh("If_else") + val if_end = Fresh("If_end") + val (instrs1, env1) = compile_bl(bl1, env) + val (instrs2, env2) = compile_bl(bl2, env1) + (compile_bexp(b, env, if_else) ++ + instrs1 ++ + List("goto " + if_end + "\n") ++ + List("\n" + if_else + ":\n\n") ++ + instrs2 ++ + List("\n" + if_end + ":\n\n"), env2) + } + case While(b, bl) => { + val loop_begin = Fresh("Loop_begin") + val loop_end = Fresh("Loop_end") + val (instrs1, env1) = compile_bl(bl, env) + (List("\n" + loop_begin + ":\n\n") ++ + compile_bexp(b, env, loop_end) ++ + instrs1 ++ + List("goto " + loop_begin + "\n") ++ + List("\n" + loop_end + ":\n\n"), env1) + } + case Write(x) => + (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) +} + +def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { + case Nil => (Nil, env) + case s::bl => { + val (instrs1, env1) = compile_stmt(s, env) + val (instrs2, env2) = compile_bl(bl, env1) + (instrs1 ++ instrs2, env2) + } +} + +def compile(input: String) : String = { + val class_name = input.split('.')(0) + val tks = Tok.fromFile(input) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) +} + + +def compile_to(input: String, output: String) = { + val fw = new java.io.FileWriter(output) + fw.write(compile(input)) + fw.close() +} + +// +val tks = Tok.fromString("x := x + 1") +val ast = Stmt.parse_single(tks) +println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) + + + +//examples + +compile_to("loops.while", "loops.j") +//compile_to("fib.while", "fib.j") + + +// testing cases for time measurements +/* +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// for testing +import scala.sys.process._ + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + + +def compile_test(n: Int) : Unit = { + val class_name = "LOOP" + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) + val fw = new java.io.FileWriter(class_name + ".j") + fw.write(assembly) + fw.close() + val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! + println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) +} + +List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) + + + +// javabyte code assmbler +// +// java -jar jvm/jasmin-2.4/jasmin.jar loops.j + +*/ + + + + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/lexer.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/lexer.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,280 @@ +// A simple lexer inspired by work of Sulzmann & Lu +//================================================== + + +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +// regular expressions including records +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp + +// values +abstract class Val +case object Empty extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(v: Val) extends Val +case class Right(v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case class Rec(x: String, v: Val) extends Val + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = + charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) + def $ (r: Rexp) = RECD(s, r) +} + +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case RECD(_, r1) => nullable(r1) +} + +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case RECD(_, r1) => der(c, r1) +} + + +// extracts a string from value +def flatten(v: Val) : String = v match { + case Empty => "" + case Chr(c) => c.toString + case Left(v) => flatten(v) + case Right(v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Stars(vs) => vs.map(flatten).mkString + case Rec(_, v) => flatten(v) +} + + +// extracts an environment from a value; +// used for tokenise a string +def env(v: Val) : List[(String, String)] = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => env(v) + case Right(v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) + case Rec(x, v) => (x, flatten(v))::env(v) +} + +// The Injection Part of the lexer + +def mkeps(r: Rexp) : Val = r match { + case ONE => Empty + case ALT(r1, r2) => + if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case STAR(r) => Stars(Nil) + case RECD(x, r) => Rec(x, mkeps(r)) +} + +def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { + case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) + case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) + case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) + case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) + case (CHAR(d), Empty) => Chr(c) + case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) +} + +// some "rectification" functions for simplification +def F_ID(v: Val): Val = v +def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) +def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) +def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { + case Right(v) => Right(f2(v)) + case Left(v) => Left(f1(v)) +} +def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { + case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) +} +def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = + (v:Val) => Sequ(f1(Empty), f2(v)) +def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = + (v:Val) => Sequ(f1(v), f2(Empty)) +def F_RECD(f: Val => Val) = (v:Val) => v match { + case Rec(x, v) => Rec(x, f(v)) +} +def F_ERROR(v: Val): Val = throw new Exception("error") + +def simp(r: Rexp): (Rexp, Val => Val) = r match { + case ALT(r1, r2) => { + val (r1s, f1s) = simp(r1) + val (r2s, f2s) = simp(r2) + (r1s, r2s) match { + case (ZERO, _) => (r2s, F_RIGHT(f2s)) + case (_, ZERO) => (r1s, F_LEFT(f1s)) + case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) + else (ALT (r1s, r2s), F_ALT(f1s, f2s)) + } + } + case SEQ(r1, r2) => { + val (r1s, f1s) = simp(r1) + val (r2s, f2s) = simp(r2) + (r1s, r2s) match { + case (ZERO, _) => (ZERO, F_ERROR) + case (_, ZERO) => (ZERO, F_ERROR) + case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) + case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) + case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) + } + } + case r => (r, F_ID) +} + +// lexing functions including simplification +def lex_simp(r: Rexp, s: List[Char]) : Val = s match { + case Nil => if (nullable(r)) mkeps(r) else + { throw new Exception("lexing error") } + case c::cs => { + val (r_simp, f_simp) = simp(der(c, r)) + inj(r, c, f_simp(lex_simp(r_simp, cs))) + } +} + +def lexing_simp(r: Rexp, s: String) = + env(lex_simp(r, s.toList)) + + +// The Lexing Rules for the Fun Language + +def PLUS(r: Rexp) = r ~ r.% + +def Range(s : List[Char]) : Rexp = s match { + case Nil => ZERO + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SYM ~ (SYM | DIGIT).% +val NUM = PLUS(DIGIT) +val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" +val SEMI: Rexp = ";" +val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" +val WHITESPACE = PLUS(" " | "\n" | "\t") +val RPAREN: Rexp = "{" +val LPAREN: Rexp = "}" +val STRING: Rexp = "\"" ~ SYM.% ~ "\"" + + +val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("str" $ STRING) | + ("p" $ (LPAREN | RPAREN)) | + ("w" $ WHITESPACE)).% + + +// Two Simple While Tests +//======================== + +println("test: read n") + +val prog0 = """read n""" +println(lexing_simp(WHILE_REGS, prog0)) + +println("test: read n; write n ") + +val prog1 = """read n; write n""" +println(lexing_simp(WHILE_REGS, prog1)) + + +// Bigger Tests +//============== + +// escapes strings and prints them out as "", "\n" and so on +def esc(raw: String): String = { + import scala.reflect.runtime.universe._ + Literal(Constant(raw)).toString +} + +def escape(tks: List[(String, String)]) = + tks.map{ case (s1, s2) => (s1, esc(s2))} + +val prog2 = """ +write "Fib"; +read n; +minus1 := 0; +minus2 := 1; +while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 +}; +write "Result"; +write minus2 +""" + +println("lexing Fib") +println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n")) + + + +val prog3 = """ +start := 1000; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} +""" + +println("lexing Loops") +println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n")) + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/mllex.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/mllex.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,109 @@ +:load matcher.scala + + +// some regular expressions +val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", + "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", + "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", + "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", + "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", + "sharing", "sig", "signature", "struct", "structure", "then", "type", + "val", "where", "while", "with", "withtype")) + +val DIGITS = RANGE("0123456789") +val NONZERODIGITS = RANGE("123456789") + +val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES) + +val ALL = ALTS(KEYWORDS, INTEGERS) + +val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/") + + + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n".toList)) +val OPS = RANGE("+-*".toList) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList).filterNot(_ match { + case T_WHITESPACE => true + case _ => false + }) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +tokenize_file(Nil, "nominal_library.ML") + + + + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("E" -> List(T_NUM), + "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)), + "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN)) + +def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match { + case (_, Nil) => true + case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) + case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) + case _ => false +} + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match { + case List(T_NT("E", tree)) => { println(tree); true } + case _ => { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) + tss.flatten.exists(parse1(g, _)) + } +} + + +println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) + + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/parser1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/parser1.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,88 @@ +// A naive bottom-up parser with backtracking +// +// Needs: +// :load matcher.scala + +// some regular expressions +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised + +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class NT(s: String) extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("F" -> List(T_NUM), + "E" -> List(T_NUM), + "E" -> List(NT("E"), T_OP("+"), NT("E")), + "E" -> List(NT("E"), T_OP("-"), NT("E")), + "E" -> List(NT("E"), T_OP("*"), NT("E")), + "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) + + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples for chop +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse(g: Grammar, ts: List[Token]) : Boolean = { + println(ts) + if (ts == List(NT("E"))) true + else { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) + tss.flatten.exists(parse(g, _)) + } +} + +def parser(g: Grammar, s: String) = { + println("\n") + parse(g, Tok.fromString(s)) +} + + + +parser(grammar, "2 + 3 * 4 + 1") +parser(grammar, "(2 + 3) * (4 + 1)") +parser(grammar, "(2 + 3) * 4 (4 + 1)") + + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/parser4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/parser4.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,82 @@ + +// parser combinators with input type I and return type T + +case class SubString(s: String, l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def substring(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubString(s, low, high) + +} + +type Ctxt = List[(String, SubString)] + +abstract class Parser[T] { + + def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] + + def parse_all(s: String) : Set[T] = + for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case class SubStringParser(s: String) extends Parser[SubString] { + val n = s.length + def parse(sb: SubString, ctxt: Ctxt) = { + if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) + Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) + else Set() + } +} + +implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) + +class IgnLst[T](p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +// ambigous grammar +lazy val E: Parser[Int] = + new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || + ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || + "0" ==> { (s) => 0 } || + "1" ==> { (s) => 1 } || + "2" ==> { (s) => 2 } || + "3" ==> { (s) => 3 }) + +println(E.parse_all("1+2*3+3")) + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re-alt.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re-alt.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,115 @@ +trait RegExp { + def nullable: Boolean + def derive(c: Char): RegExp +} + +case object Empty extends RegExp { + def nullable = false + def derive(c: Char) = Empty +} + +case object Eps extends RegExp { + def nullable = true + def derive(c: Char) = Empty +} + +case class Str(s: String) extends RegExp { + def nullable = s.isEmpty + def derive(c: Char) = + if (s.isEmpty || s.head != c) Empty + else Str(s.tail) +} + +case class Cat(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = + if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) + else Cat(r.derive(c), s) +} + +case class Star(r: RegExp) extends RegExp { + def nullable = true + def derive(c: Char) = Cat(r.derive(c), this) +} + +case class Or(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable || s.nullable + def derive(c: Char) = Or(r.derive(c), s.derive(c)) +} + +case class And(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = And(r.derive(c), s.derive(c)) +} + +case class Not(r: RegExp) extends RegExp { + def nullable = !r.nullable + def derive(c: Char) = Not(r.derive(c)) +} + + + + +object Matcher { + def matches(r: RegExp, s: String): Boolean = { + if (s.isEmpty) r.nullable + else matches(r.derive(s.head), s.tail) + } +} + + +object Pimps { + implicit def string2RegExp(s: String) = Str(s) + + implicit def regExpOps(r: RegExp) = new { + def | (s: RegExp) = Or(r, s) + def & (s: RegExp) = And(r, s) + def % = Star(r) + def %(n: Int) = rep(r, n) + def ? = Or(Eps, r) + def ! = Not(r) + def ++ (s: RegExp) = Cat(r, s) + def ~ (s: String) = Matcher.matches(r, s) + } + + implicit def stringOps(s: String) = new { + def | (r: RegExp) = Or(s, r) + def | (r: String) = Or(s, r) + def & (r: RegExp) = And(s, r) + def & (r: String) = And(s, r) + def % = Star(s) + def % (n: Int) = rep(Str(s), n) + def ? = Or(Eps, s) + def ! = Not(s) + def ++ (r: RegExp) = Cat(s, r) + def ++ (r: String) = Cat(s, r) + def ~ (t: String) = Matcher.matches(s, t) + } + + def rep(r: RegExp, n: Int): RegExp = + if (n <= 0) Star(r) + else Cat(r, rep(r, n - 1)) +} + + +object Test { + import Pimps._ + + val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + val int = ("+" | "-").? ++ digit.%(1) + val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? + + def main(args: Array[String]) { + val ints = List("0", "-4534", "+049", "99") + val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") + val errs = List("", "-", "+", "+-1", "-+2", "2-") + + ints.foreach(s => assert(int ~ s)) + reals.foreach(s => assert(!(int ~ s))) + errs.foreach(s => assert(!(int ~ s))) + + ints.foreach(s => assert(real ~ s)) + reals.foreach(s => assert(real ~ s)) + errs.foreach(s => assert(!(real ~ s))) + } +} diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re-internal.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re-internal.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,17 @@ + +// measures the time a function needs +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 10001 by 300) { + val re = ("((a?){" + i + "})(a{" + i + "})") + println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) +} + + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re-sulzmann-partial.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re-sulzmann-partial.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,136 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +abstract class Pat + +case class VAR(x: String, r: Rexp) extends Pat +case class GRP(x: String, p: Pat) extends Pat +case class PSEQ(p1: Pat, p2: Pat) extends Pat +case class PALT(p1: Pat, p2: Pat) extends Pat +case class PSTAR(p: Pat) extends Pat + + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def down (p: Pat) : Rexp = p match { + case VAR(x: String, w: String, r: Rexp) => r + case GRP(x: String, w: String, p: Pat) => down(p) + case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2)) + case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2)) + case PSTAR(p: Pat) => STAR(down(p)) +} + +def patnullable (p: Pat) : Boolean = p match { + case PVar(_, r) => nullable(r) + case PSEQ(p1, p2) => patnullable(p1) && patnullable(p2) + case PCHOICE(p1, p2) => patnullable(p1) || patnullable(p2) + case PSTAR(p) => true + case PatVar(_, p) => patnullable(p) +} + +//type Env = Set[List[(String, String)]] +type Env = Map[Int, String] + +def update(n: Int, c: Char) (env: Env) = + env + (n -> (env.getOrElse(n, "") + c.toString)) + +def pderivPat (p: Pat, c: Char) : Set[(Pat, Env => Env)] = p match { + case PVar(n: Int, r: Rexp) => { + val pds = pderiv(r, c) + if (pds.isEmpty) Set() + else Set((PVar(n, toRexp(pds.toList)), update(n, c))) + } + case PSEQ(p1: Pat, p2: Pat) => { + val pats : Set[(Pat, Env => Env)] = + for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, p2), f) + if (nullable(strip(p1))) pats ++ pderivPat(p2, c) + else pats + } + case PCHOICE(p1: Pat, p2: Pat) => pderivPat(p1, c) ++ pderivPat(p2, c) + case PSTAR(p1: Pat) => + for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, PSTAR(p1)), f) + case PatVar(n: Int, p1: Pat) => + for ((p, f) <- pderivPat(p1, c)) yield (PatVar(n, p), f compose (update (n, c))) +} + + +val p2 = PSEQ(PVar(1, STAR("A")), PVar(2, STAR("A"))) +pderivPat(p2, 'A').mkString("\n") + + +def greedy_aux(env: Set[(Pat, Env)], w: List[Char]) : Set[Env] = w match { + case Nil => + for ((p, e) <- env if patnullable(p)) yield e + case c::cs => { + val env2 = for ((p, e) <- env; (p1, f) <- pderivPat(p, c)) yield (p1, f(e)) + greedy_aux(env2, cs) + } +} + +def greedy(p: Pat, w: String) = { + val res = greedy_aux(Set((p, Map())), w.toList) + if (res.isEmpty) None else Some(res) +} + +// some convenience for typing in regular expressions +def charlist2rexp (s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + +implicit def PatOps (p: Pat) = new { + def | (q: Pat) = PALT(p, q) + def % = PSTAR(p) + def ~ (q: Pat) = PSEQ(p, q) +} + +val p3 = PSTAR(PSEQ(PVar(1, "A"), PVar(2, "B"))) + +greedy2(Set((p3, Map())), "ABAB".toList) + + +val p4 = PVar(1, "A") +greedy2(Set((p4, Map())), "A".toList) + +val p5 = PSEQ(PVar(1, "A"), PVar(1, "B")) +greedy2(Set((p5, Map())), "AB".toList) + +val res = pderivPat(p5, 'A') +for ((_, f) <- res) yield f(Map()) + + +val p6 = PatVar(4,PSTAR(PCHOICE(PCHOICE(PVar(1, "A"), PVar(2, "AB")), PVar(3, "B")))) + +greedy(p6, "ABA") diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re-sulzmann.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re-sulzmann.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,164 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +abstract class Pat + +case class VAR(x: String, w: String, r: Rexp) extends Pat +case class GRP(x: String, w: String, p: Pat) extends Pat +case class PSEQ(p1: Pat, p2: Pat) extends Pat +case class PALT(p1: Pat, p2: Pat) extends Pat +case class PSTAR(p: Pat) extends Pat + +object VAR { def apply(x: String, r: Rexp) : VAR = VAR(x, "", r) } +object GRP { def apply(x: String, p: Pat) : GRP = GRP(x, "", p) } + + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +def down (p: Pat) : Rexp = p match { + case VAR(x: String, w: String, r: Rexp) => r + case GRP(x: String, w: String, p: Pat) => down(p) + case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2)) + case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2)) + case PSTAR(p: Pat) => STAR(down(p)) +} + +def empty (p: Pat) : Pat = p match { + case VAR(x: String, w: String, r: Rexp) => + if (nullable(r)) VAR(x, w, EMPTY) + else VAR(x, w, NULL) + case GRP(x: String, w: String, p: Pat) => GRP(x, w, empty(p)) + case PSEQ(p1: Pat, p2: Pat) => PSEQ(empty(p1), empty(p2)) + case PALT(p1: Pat, p2: Pat) => PALT(empty(p1), empty(p2)) + case PSTAR(p: Pat) => PSTAR(empty(p)) +} + +def patder (c: Char, p: Pat) : Pat = p match { + case VAR(x: String, w: String, r: Rexp) => VAR(x, w + c, der(c, r)) + case GRP(x: String, w: String, p: Pat) => GRP(x, w + c, patder(c, p)) + case PSEQ(p1: Pat, p2: Pat) => + if (nullable(down(p1))) PALT(PSEQ(patder(c, p1), p2), PSEQ(empty(p1), patder(c, p2))) + else PSEQ(patder(c, p1), p2) + case PALT(p1: Pat, p2: Pat) => PALT(patder(c, p1), patder(c, p2)) + case PSTAR(p: Pat) => PSEQ(patder(c, p), PSTAR(p)) +} + +def patders (s: List[Char], p: Pat) : Pat = s match { + case Nil => p + case c::s => patders(s, patder(c, p)) +} + +type Env = Set[List[(String, String)]] + +def special (p: Pat, env: Env) : Env = + if (env == Set() && nullable(down(p))) Set(Nil) else env + +def env (p: Pat) : Env = p match { + case VAR(x: String, w: String, r: Rexp) => + if (nullable(r)) Set(List((x, w))) else Set() + case GRP(x: String, w: String, p1: Pat) => + special(p, for (e <- env(p1)) yield List((x, w)) ++ e) + case PSEQ(p1: Pat, p2: Pat) => + special(p, for (e1 <- env(p1); e2 <- env(p2)) yield (e1 ++ e2)) + case PALT(p1: Pat, p2: Pat) => special(p, env(p1) ++ env(p2)) + case PSTAR(p1: Pat) => special(p, env(p1)) +} + +def matcher (p: Pat, s: String) = env(empty(patders(s.toList, p))) + + +// some convenience for typing in regular expressions +def charlist2rexp (s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + +implicit def PatOps (p: Pat) = new { + def | (q: Pat) = PALT(p, q) + def % = PSTAR(p) + def ~ (q: Pat) = PSEQ(p, q) +} + + +val p0 = VAR("x", "A") + +patders("A".toList, p0) +matcher(p0, "A") + +val p1 = VAR("x", "A") | VAR("y", "A") | VAR("z", "B") + +patders("A".toList, p1) +matcher(p1, "A") +matcher(p1, "B") + +val p2 = VAR("x", "AB") +matcher(p2, "AB") +matcher(p2, "AA") + +val p3 = VAR("x", "A" ~ "B") +matcher(p3, "AB") +matcher(p3, "AA") + +val p4 = VAR("x", "A") ~ VAR("y", "B") +matcher(p4, "AB") +matcher(p4, "AA") + +val p5 = VAR("x", "A".%) +matcher(p5, "A") +matcher(p5, "AA") +matcher(p5, "") +matcher(p5, "AAA") + +val p6 = VAR("x", "A").% +matcher(p6, "A") +matcher(p6, "AA") +matcher(p6, "") +matcher(p6, "AAA") + +val p7 = (VAR("x", "A") | VAR("y", "AB") | VAR("z", "B")).% +matcher(p7, "AB") + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re0.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re0.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,119 @@ +import scala.annotation.tailrec +import scala.language.implicitConversions + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLCHAR extends Rexp +case class CHAR(c: Char) extends Rexp +case class STR(s: String) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp +case class REP(r: Rexp, n: Int) extends Rexp + +// some convenience for typing in regular expressions +implicit def string2rexp(s : String) : Rexp = STR(s) + +implicit def RexpOps(r: Rexp) : Rexp = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def %(n: Int) = REP(r, n) + def %%(n: Int) = SEQ(REP(r, n), STAR(r)) + def ? = ALT(EMPTY, r) + def unary_! = NOT(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) : Rexp = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def %(n: Int) = REP(s, n) + def %%(n: Int) = SEQ(REP(s, n), STAR(s)) + def ? = ALT(EMPTY, s) + def unary_! = NOT(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +// nullable function: tests whether the regular +// expression can recognise the empty string + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLCHAR => false + case CHAR(_) => false + case STR(s) => s.isEmpty + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) + case REP(r, i) => if (i == 0) true else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLCHAR => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) + case REP(r, i) => + if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) +} + + +// derivative w.r.t. a string (iterates der) +@tailrec +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +//examples +val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +val int = ("+" | "-").? ~ digit.%%(1) +val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? + +val ints = List("0", "-4534", "+049", "99") +val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") +val errs = List("", "-", "+", "+-1", "-+2", "2-") + +ints.map(s => matcher(int, s)) +reals.map(s => matcher(int, s)) +errs.map(s => matcher(int, s)) + +ints.map(s => matcher(real, s)) +reals.map(s => matcher(real, s)) +errs.map(s => matcher(real, s)) + + + +def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 12000 by 500) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re1.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,154 @@ +// A simple matcher for basic regular expressions + +abstract class Rexp +case object ZERO extends Rexp // matches nothing +case object ONE extends Rexp // matches an empty string +case class CHAR(c: Char) extends Rexp // matches a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star + +// nullable function: tests whether a regular +// expression can recognise the empty string +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// the derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) +} + +// the derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// the main matcher function +def matcher(r: Rexp, s: String) : Boolean = + nullable(ders(s.toList, r)) + + +// examples from the homework + +val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) +der('a', r) +der('b', r) +der('c', r) + +val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z')) +der('x', r2) +der('y', der('x', r2)) +der('z', der('y', der('x', r2))) + + +// the optional regular expression (one or zero times) +def OPT(r: Rexp) = ALT(r, ONE) + +// the n-times regular expression (explicitly expanded) +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => ONE + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + + +// Test Cases + +// the evil regular expression a?{n} a{n} +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) + +// the evil regular expression (a*)*b +val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + +// for measuring time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start) / (i * 1.0e9) +} + + +// test: (a?{n}) (a{n}) +println("Test (a?{n}) (a{n})") + +for (i <- 0 to 20 by 2) { + println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") +} + +// test: (a*)* b +println("Test (a*)* b") + +for (i <- 0 to 20 by 2) { + println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") +} + + +// the size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) +} + +// the expicit expansion in EVIL1(n) increases +// drastically its size + +size(EVIL1(1)) // 5 +size(EVIL1(3)) // 17 +size(EVIL1(5)) // 29 +size(EVIL1(7)) // 41 +size(EVIL1(20)) // 119 + +// given a regular expression and building successive +// derivatives might result into bigger and bigger +// regular expressions...here is an example for this: + +// (a+b)* o a o b o (a+b)* +val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b'))) +val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux))) + +size(ders("".toList, BIG)) // 13 +size(ders("ab".toList, BIG)) // 51 +size(ders("abab".toList, BIG)) // 112 +size(ders("ababab".toList, BIG)) // 191 +size(ders("abababab".toList, BIG)) // 288 +size(ders("ababababab".toList, BIG)) // 403 +size(ders("abababababab".toList, BIG)) // 536 + + +size(ders(("ab" * 200).toList, BIG)) // 366808 + +for (i <- 0 to 200 by 10) { + println(f"$i: ${time_needed(2, matcher(BIG, "ab" * i))}%.5f") +} + + + + +////////////////////////////////////// +def concat(A: Set[String], B: Set[String]) : Set[String] = + for (s1 <- A; s2 <- B) yield s1 ++ s2 + + +val A = Set("foo", "bar") +val B = Set("a", "b") + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re2.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,118 @@ +// A Version with an explicit n-times regular expression; +// this keeps the size of the regular expression in the +// EVIL1 test-case quite small + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor for n-times + + +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + + +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + case NTIMES(r1, i) => + if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) +} + +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +// the optional regular expression: one or zero times +// this regular expression is still defined in terms of ALT +def OPT(r: Rexp) = ALT(r, ONE) + + +// Test Cases + +// evil regular expressions +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start) / (i * 1.0e9) +} + + + +// test: (a?{n}) (a{n}) +for (i <- 0 to 1000 by 100) { + println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") +} + +// test: (a*)* b +for (i <- 0 to 20) { + println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") +} + + +// the size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case NTIMES(r, _) => 1 + size(r) +} + +// EVIL1(n) has now a constant size, no matter +// what n is; also the derivative only grows +// moderately + +size(EVIL1(1)) // 7 +size(EVIL1(3)) // 7 +size(EVIL1(5)) // 7 +size(EVIL1(7)) // 7 +size(EVIL1(20)) // 7 + +size(ders("".toList, EVIL1(5))) // 7 +size(ders("a".toList, EVIL1(5))) // 16 +size(ders("aa".toList, EVIL1(5))) // 35 +size(ders("aaa".toList, EVIL1(5))) // 59 +size(ders("aaaa".toList, EVIL1(5))) // 88 +size(ders("aaaaa".toList, EVIL1(5))) // 122 +size(ders("aaaaaa".toList, EVIL1(5))) // 151 + +// but the size of the derivatives can still grow +// quite dramatically in case of EVIL2 + +size(ders("".toList, EVIL2)) // 5 +size(ders("a".toList, EVIL2)) // 12 +size(ders("aa".toList, EVIL2)) // 28 +size(ders("aaa".toList, EVIL2)) // 58 +size(ders("aaaa".toList, EVIL2)) // 116 +size(ders("aaaaa".toList, EVIL2)) // 230 +size(ders("aaaaaa".toList, EVIL2)) // 456 + +size(ders(("a" * 20).toList, EVIL2)) // 7340068 \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re3.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,127 @@ +// A version with simplification of derivatives; +// this keeps the regular expressions small, which +// is good for the run-time + + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp + + + +// the nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + +// the derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + case NTIMES(r, i) => + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + + +// the derivative w.r.t. a string (iterates der) +def ders(s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, simp(der(c, r))) +} + + +// the main matcher function +def matcher(r: Rexp, s: String) : Boolean = + nullable(ders(s.toList, r)) + + +// one or zero +def OPT(r: Rexp) = ALT(r, ONE) + + +// Test Cases + +// evil regular expressions: (a?){n} a{n} and (a*)* b +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +//test: (a?{n}) (a{n}) +for (i <- 0 to 8000 by 1000) { + println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f") +} + +//test: (a*)* b +for (i <- 0 to 6000000 by 500000) { + println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f") +} + + +// size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case NTIMES(r, _) => 1 + size(r) +} + + +// now the size of the derivatives grows +// much, much slower + +size(ders("".toList, EVIL2)) // 5 +size(ders("a".toList, EVIL2)) // 8 +size(ders("aa".toList, EVIL2)) // 8 +size(ders("aaa".toList, EVIL2)) // 8 +size(ders("aaaa".toList, EVIL2)) // 8 +size(ders("aaaaa".toList, EVIL2)) // 8 + + + + + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re3a.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re3a.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,112 @@ + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp +case class UPNTIMES(r: Rexp, n: Int) extends Rexp + +// the nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) + case UPNTIMES(r: Rexp, n: Int) => true +} + +// the derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + case NTIMES(r1, i) => + if (i == 0) ZERO else + if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) + else SEQ(der(c, r1), NTIMES(r1, i - 1)) + case UPNTIMES(r1, i) => + if (i == 0) ZERO + else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) +} + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + + +// the derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, simp(der(c, r))) +} + + +// the main matcher function +def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +// one or zero +def OPT(r: Rexp) = ALT(r, ONE) + +// evil regular expressions +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +val EVIL3 = SEQ(STAR(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a')))), CHAR('b')) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + "%.5f".format((end - start) / (i * 1.0e9)) +} + + +// test: (a?{n}) (a{n}) +for (i <- 0 to 8000 by 1000) { + println(s"$i: ${time_needed(2, matches(EVIL1(i), "a" * i))}") +} + +// test: (a*)* b +for (i <- 0 to 6000000 by 500000) { + println(s"$i: ${time_needed(2, matches(EVIL2, "a" * i))}") +} + + +val r0 = simp(der('a', EVIL3)) +val r1 = simp(der('a', r0)) +val r2 = simp(der('a', r1)) +val r3 = simp(der('a', r2)) +val r4 = simp(der('a', r3)) +val r5 = simp(der('a', r4)) +val r6 = simp(der('a', r5)) + +//test: (a|aa)* b +/* +for (i <- 0 to 100 by 10) { + println(s"$i: ${time_needed(2, matches(EVIL3, "a" * i ++ "c"))}") +} + */ + diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re3ext.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re3ext.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,151 @@ +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp + +def simp(r: Rexp): Rexp = r match { + case ALT(r1, r2) => { + (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) + } + } + case SEQ(r1, r2) => { + (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + } + case NTIMES(r, n) => NTIMES(simp(r), n) + case r => r +} + +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r, i) => + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, simp(der(c, r))) +} + +def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +//one or zero +def OPT(r: Rexp) = ALT(r, ONE) + +def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +/* +for (i <- 1 to 9001 by 1000) { + println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i)))) +} +*/ + +// some convenience for typing in regular expressions +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +def PLUS(r: Rexp) = SEQ(r, STAR(r)) +def RANGE(s: List[Char]) : Rexp = s match { + case Nil => ZERO + case c::s => ALT(CHAR(c), RANGE(s)) +} +def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp = + if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1)) + + +println("Coursework:") +val REG1 = PLUS(PLUS("a" ~ "a" ~ "a")) +val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a? + + +//40 * aaa matches +//43 * aaa + aa does not +//45 * aaa + a + +println("EVIL1:") +println(matches(REG1, "aaa" * 40)) +println(matches(REG1, "aaa" * 43 + "aa")) +println(matches(REG1, "aaa" * 45 + "a")) +println("EVIL2:") +println(matches(REG2, "aaa" * 40)) +println(matches(REG2, "aaa" * 43 + "aa")) +println(matches(REG2, "aaa" * 45 + "a")) + +println("EMAIL:") +val LOWERCASE = "abcdefghijklmnopqrstuvwxyz" +val DIGITS = "0123456789" +val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~ + PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~ + NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6) + +val my_email = "christian.urban@kcl.ac.uk" + +println(matches(EMAIL, my_email)) + +println(matches(NTIMES("a", 2), "a")) +println(matches(NTIMES("a", 2), "aa")) +println(matches(NTIMES("a", 2), "aaa")) + +println(matches(NMTIMES("a", 2, 3), "a")) +println(matches(NMTIMES("a", 2, 3), "aa")) +println(matches(NMTIMES("a", 2, 3), "aaa")) +println(matches(NMTIMES("a", 2, 3), "aaaa")) diff -r e66bd5c563eb -r b5b5583a3a08 Attic/re4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/re4.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,156 @@ +// A version which attempts to move whole strings, not +// just characters, under derivatives whenever possible + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp + +// the nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + +// the derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + case NTIMES(r1, i) => + if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) +} + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + +// an example +val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z')) +der('x', r) +der('y', der('x', r)) +der('z', der('y', der('x', r))) +simp(der('z', der('y', der('x', r)))) + +// *new* +// the derivative w.r.t. a string (iterates der) +def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match { + case (Nil, r) => r + case (s, ZERO) => ZERO + case (s, ONE) => if (s == Nil) ONE else ZERO + case (s, CHAR(c)) => if (s == List(c)) ONE else + if (s == Nil) CHAR(c) else ZERO + case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2)) + case (c::s, r) => ders2(s, simp(der(c, r))) +} + +// the main matcher function +def matcher(r: Rexp, s: String) : Boolean = + nullable(ders2(s.toList, r)) + + +// one or zero +def OPT(r: Rexp) = ALT(r, ONE) + + +// Test Cases + +def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) +val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + +// for measuring time +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start) / (i * 1.0e9) +} + + +// test: (a?{n}) (a{n}) +for (i <- 0 to 11000 by 1000) { + println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") +} + + +// test: (a*)* b +for (i <- 0 to 7000000 by 500000) { + println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") +} + + + +// the size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case NTIMES(r, _) => 1 + size(r) +} + + +// string of a regular expressions - for testing purposes +def string(r: Rexp) : String = r match { + case ZERO => "0" + case ONE => "1" + case CHAR(c) => c.toString + case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" + case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" + case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" + case STAR(r) => s"(${string(r)})*" + case NTIMES(r, n) => s"(${string(r)}){${n}}" +} + + +// test: ("a" | "aa")* +val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) + +// test: ("" | "a" | "aa")* +val EVIL4 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))) + +val t1 = ders2("a".toList, EVIL3) +val t1s = simp(t1) +val t2 = ders2("aa".toList, t1s) +val t2s = simp(t2) +val t3 = ders2("aaa".toList, t2s) +val t3s = simp(t3) +val t4 = ders2("aaaa".toList, t3s) +val t4s = simp(t4) +println(string(t1) + " " + size(t1)) +println("s " + string(t1s) + " " + size(t1s)) +println(string(t2) + " " + size(t2)) +println("s " + string(t2s) + " " + size(t2s)) +println(string(t3) + " " + size(t3)) +println("s " + string(t3s) + " " + size(t3s)) +println(string(t4) + " " + size(t4)) +println("s " + string(t4s) + " " + size(t4s)) diff -r e66bd5c563eb -r b5b5583a3a08 Attic/regexp2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/regexp2.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e66bd5c563eb -r b5b5583a3a08 Attic/rev.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/rev.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,8 @@ +def rev(r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ONE + case CHAR(c) => CHAR(c) + case ALT(r1, r2) => ALT(rev(r1), rev(r2)) + case SEQ(r1, r2) => SEQ(rev(r2), rev(r1)) + case STAR(r) => STAR(rev(r)) +} diff -r e66bd5c563eb -r b5b5583a3a08 Attic/token-bak.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/token-bak.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,145 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls +import scala.util._ +import scala.annotation.tailrec + +sealed abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SYM ~ (SYM | DIGIT).% +val NUM = PLUS(DIGIT) +val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" +val SEMI: Rexp = ";" +val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" + +//regular expressions ranked by position in the list +val regs: List[Rexp] = + List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE) + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def zeroable (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => zeroable(r1) && zeroable(r2) + case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) + case STAR(_) => false +} + +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + + +// calculates derivatives until all of them are zeroable +@tailrec +def munch(s: List[Char], + pos: Int, + rs: List[Rexp], + last: Option[Int]): Option[Int] = rs match { + case Nil => last + case rs if (s.length <= pos) => last + case rs => { + val ders = rs.map(der(s(pos), _)) + val rs_nzero = ders.filterNot(zeroable(_)) + val rs_nulls = ders.filter(nullable(_)) + val new_last = if (rs_nulls != Nil) Some(pos) else last + munch(s, 1 + pos, rs_nzero, new_last) + } +} + +// iterates the munching function and prints +// out the component strings +@tailrec +def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match { + case None if (s == "") => println("EOF") + case None => println(s"Lexing error: $s") + case Some(n) => { + val (head, tail) = s.splitAt(n + 1) + print(s"|${head.replaceAll("\n","RET")}|") + tokenize(tail, rs) + } +} + + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + +tokenize(test_prog, regs) diff -r e66bd5c563eb -r b5b5583a3a08 Attic/while.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/while.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,244 @@ +// A parser and evaluator for the while language +// +import matcher._ +import parser._ + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + +def len(xs: List[(Int, Int)]): Int = xs match { + case Nil => 0 + case (1, x)::xs => len(xs) + 1 + case (_, x)::xs => len(xs) +} + +def fst(p: (Int, Int)): Int = p match { + case Nil => 0 + case (1, x)::xs => len(xs) + 1 + case (_, x)::xs => len(xs) +} + + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || + (T_KWD("true") ==> ((_) => True)) || + (T_KWD("false") ==> ((_) => False: BExp)) + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + + +// examples +val p1 = "x := 5" +val p1_toks = Tok.fromString(p1) +val p1_ast = Block.parse_all(p1_toks) +println(p1_toks) +println(p1_ast) + +val p1a = "{ x := 5; y := 8}" +val p1a_toks = Tok.fromString(p1a) +val p1a_ast = Block.parse_all(p1a_toks) +println(p1a_ast) + +val p2 = "5 = 6" +val p2_toks = Tok.fromString(p2) +val p2_ast = BExp.parse_all(p2_toks) +println(p2_ast) + +val p2a = "true" +val p2a_toks = Tok.fromString(p2a) +val p2a_ast = BExp.parse_all(p2a_toks) +println(p2a_ast) + +val p3 = "if true then skip else skip" +val p3_toks = Tok.fromString(p3) +val p3_ast = Stmt.parse_all(p3_toks) +println(p3_ast) + +val p3a = "if true then x := 5 else x := 10" +val p3a_toks = Tok.fromString(p3a) +val p3a_ast = Stmt.parse_all(p3a_toks) +println(p3a_ast) + +val p3b = "if false then x := 5 else x := 10" +val p3b_toks = Tok.fromString(p3b) +val p3b_ast = Stmt.parse_all(p3b_toks) +println(p3b_ast) + +// multiplication +val p4 = """{ x := 5; + y := 4; + r := 0; + while y > 0 do { + r := r + x; + y := y - 1 + } + }""" +val p4_toks = Tok.fromString(p4) +val p4_ast = Block.parse_all(p4_toks) +println(p4_ast) + +val p5 = """ + n := 9; + minus1 := 0; + minus2 := 1; + temp := 0; + while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 + }; + fib_res := minus2 +""" +val p5_toks = Tok.fromString(p5) +val p5_ast = Stmts.parse_all(p5_toks) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +//examples +println(eval_stmt(p3a_ast.head, Map.empty)) +println(eval_stmt(p3b_ast.head, Map.empty)) +println(eval_bl(p4_ast.head, Map.empty)) +println(eval_bl(p5_ast.head, Map.empty)) diff -r e66bd5c563eb -r b5b5583a3a08 Attic/while1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Attic/while1.scala Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,221 @@ +// A parser and evaluator for the WHILE language +// + + + + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env + case Write(x) => { println(env(x)); env } +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +def eval_prog(name: String) : Env = { + val tks = Tok.fromFile(name) + val ast = Stmts.parse_single(tks) + eval_bl(ast, Map.empty) +} + + +//examples + +//eval_prog("loops.while") +eval_prog("fib.while") + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} +""" + + + +def eval_test(n: Int) : Unit = { + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) +} + +List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) + + + + + + + diff -r e66bd5c563eb -r b5b5583a3a08 handouts/ho01.pdf Binary file handouts/ho01.pdf has changed diff -r e66bd5c563eb -r b5b5583a3a08 handouts/ho01.tex --- a/handouts/ho01.tex Mon Jul 27 11:02:48 2020 +0100 +++ b/handouts/ho01.tex Thu Jul 30 13:50:54 2020 +0100 @@ -46,6 +46,7 @@ \section*{Handout 1} + The purpose of a compiler is to transform a program a human can read and write into code the machine can run as fast as possible. Developing a compiler is an old craft going back to 1952 with the first compiler @@ -479,8 +480,18 @@ structure of regular expressions is always clear. But for writing them down in a more mathematical fashion, parentheses will be helpful. For example we will write $(r_1 + r_2)^*$, -which is different from, say $r_1 + (r_2)^*$. The former means -roughly zero or more times $r_1$ or $r_2$, while the latter +which is different from, say $r_1 + (r_2)^*$. This can be +seen if we write regular expressions as trees: + +\begin{center} +\includegraphics[scale=0.65]{../pics/r1.pdf} +\hspace{3cm} +\includegraphics[scale=0.65]{../pics/r2.pdf} +\end{center} + +\noindent +The regular expression on the left means +roughly zero or more times $r_1$ or $r_2$, while the one on the right means $r_1$, or zero or more times $r_2$. This will turn out to be two different patterns, which match in general different strings. We should also write $(r_1 + r_2) + r_3$, which is diff -r e66bd5c563eb -r b5b5583a3a08 progs/automata/build.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/automata/build.sh Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,5 @@ +#!/bin/bash +set -euo pipefail + +amm thompson.sc + diff -r e66bd5c563eb -r b5b5583a3a08 progs/automata/thompson.sc --- a/progs/automata/thompson.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/automata/thompson.sc Thu Jul 30 13:50:54 2020 +0100 @@ -171,7 +171,7 @@ // construction), in general the DFAs can be slow because of // the state explosion in the subset construction -for (i <- 1 to 13) { +for (i <- 1 to 7) { println(i + ": " + "%.5f".format(time_needed(2, tmatches_dfa(EVIL1(i), "a" * i)))) } diff -r e66bd5c563eb -r b5b5583a3a08 progs/bf/bfc0.sc --- a/progs/bf/bfc0.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/bf/bfc0.sc Thu Jul 30 13:50:54 2020 +0100 @@ -7,7 +7,9 @@ // // // Note: An interesting exercise is to call -// gcc with -O3 instead of -O0 +// gcc with -O3 instead of -O0 (see invocation +// below). + // simple instructions def instr(c: Char) : String = c match { @@ -25,7 +27,7 @@ def instrs(prog: String) : String = prog.toList.map(instr(_)).mkString -// adding the boilerplate +// adding boilerplate def compile(prog: String) : String = s"""#include #include @@ -36,16 +38,17 @@ ${instrs(prog)} return 0;}""" -def compile_file(name: String, prog: String) = + +def compile_to_file(name: String, prog: String) = os.write.over(os.pwd / name, compile(prog)) // running the c-compiler over the transpiled // BF program and running the resulting binary -def compile_run(prog: String) = { +def compile_and_run(prog: String) = { val tn = "tmp" - compile_file(s"${tn}.c", prog) + compile_to_file(s"${tn}.c", prog) os.proc("gcc", "-O0", "-o", tn, s"${tn}.c").call() // call gcc os.proc("./tmp").call(stdout = os.Inherit) // run binary } @@ -64,7 +67,7 @@ @main def main(fname: String) = { val bf_str = os.read(os.pwd / fname) - println(s"${time_needed(1, compile_run(bf_str))} secs") + println(s"${time_needed(1, compile_and_run(bf_str))} secs") } diff -r e66bd5c563eb -r b5b5583a3a08 progs/bf/bfc1.sc --- a/progs/bf/bfc1.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/bf/bfc1.sc Thu Jul 30 13:50:54 2020 +0100 @@ -2,8 +2,8 @@ //========================================= // // This version "optimises" the code by replacing -// for example +++ by (*ptr) += 3, instead of -// (*ptr)++, (*ptr)++, (*ptr)++ +// for example +++ by (*ptr) += 3, instead of three +// separate (*ptr)++, (*ptr)++, (*ptr)++ // // Call with // @@ -11,7 +11,7 @@ // -// generating "compound" c-instructions +// generate "compound" c-instructions def instr2(c: Char, n: Int) : String = c match { case '>' => s"ptr += $n ;" case '<' => s"ptr -= $n ;" @@ -40,7 +40,7 @@ def instrs2(prog: String) : String = splice(prog.toList, Nil).reverse.mkString -// adding the boilerplate +// adding boilerplate def compile(prog: String) : String = s"""#include #include @@ -51,16 +51,16 @@ ${instrs2(prog)} return 0;}""" -def compile_file(name: String, prog: String) = +def compile_to_file(name: String, prog: String) = os.write.over(os.pwd / name, compile(prog)) // running the c-compiler over the transpiled // BF program and running the resulting binary -def compile_run(prog: String) = { +def compile_and_run(prog: String) = { val tn = "tmp" - compile_file(s"${tn}.c", prog) + compile_to_file(s"${tn}.c", prog) os.proc("gcc", "-O0", "-o", tn, s"${tn}.c").call() // call gcc os.proc("./tmp").call(stdout = os.Inherit) // run binary } @@ -79,6 +79,6 @@ @main def main(fname: String) = { val bf_str = os.read(os.pwd / fname) - println(s"${time_needed(1, compile_run(bf_str))} secs") + println(s"${time_needed(1, compile_and_run(bf_str))} secs") } diff -r e66bd5c563eb -r b5b5583a3a08 progs/bf/bfi.sc --- a/progs/bf/bfi.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/bf/bfi.sc Thu Jul 30 13:50:54 2020 +0100 @@ -7,12 +7,10 @@ // -import scala.util._ - -// BF memory as a map +// BF memory is represented as a Map type Mem = Map[Int, Int] -// reading and writing BF memory +// safe reading and writing of BF memory def sread(mem: Mem, mp: Int) : Int = mem.getOrElse(mp, 0) diff -r e66bd5c563eb -r b5b5583a3a08 progs/catastrophic/catastrophic.py --- a/progs/catastrophic/catastrophic.py Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/catastrophic/catastrophic.py Thu Jul 30 13:50:54 2020 +0100 @@ -9,7 +9,7 @@ # # call with timing as: # -# > time ./catastrophic.py 20 +# time ./catastrophic.py 20 # counter n given on the command line cn = sys.argv[1] diff -r e66bd5c563eb -r b5b5583a3a08 progs/catastrophic/catastrophic.rb --- a/progs/catastrophic/catastrophic.rb Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/catastrophic/catastrophic.rb Thu Jul 30 13:50:54 2020 +0100 @@ -8,7 +8,7 @@ # # run on the command line with: # -# $> ruby catastrophic.rb +# ruby catastrophic.rb # nums = (1..1000) diff -r e66bd5c563eb -r b5b5583a3a08 progs/catastrophic/catastrophic2.py --- a/progs/catastrophic/catastrophic2.py Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/catastrophic/catastrophic2.py Thu Jul 30 13:50:54 2020 +0100 @@ -9,7 +9,7 @@ # # call with timing as: # -# > time ./catastrophic.py 20 +# time ./catastrophic.py 20 # counter n given on the command line cn = sys.argv[1] diff -r e66bd5c563eb -r b5b5583a3a08 progs/collatz.while --- a/progs/collatz.while Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -write "Input a number "; -read n; -while n > 1 do { - if n % 2 == 0 - then n := n/2 - else n := 3*n+1; -}; -write "Yes"; diff -r e66bd5c563eb -r b5b5583a3a08 progs/defs.fun --- a/progs/defs.fun Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ - -def zero(x) = 0; - -def suc(x) = x + 1; - -def pred(x) = - if x == 0 then x else x - 1; - -def add(x, y) = - if x == 0 then y else suc(add(x - 1, y)); - -def mult(x, y) = - if x == 0 then 0 else add(y, mult(x - 1, y)); - -def pow(x, y) = - if y == 0 then 1 else mult(x, pow(x, y - 1)); - -def fib(n) = if n == 0 then 0 - else if n == 1 then 1 - else fib(n - 1) + fib(n - 2); - -def fact(n) = - if n == 0 then 1 else n * fact(n - 1); - -def ack(m, n) = if m == 0 then n + 1 - else if n == 0 then ack(m - 1, 1) - else ack(m - 1, ack(m, n - 1)); - -def stack_test(x) = x + 1 + 2 + 3 + 4 + 5; - -def div(x, y) = x / y; //integer division - -def rem(x, y) = x % y; //remainder - -def gcd(a, b) = - if b == 0 then a else gcd(b, a % b); - -def is_prime_aux(n, i) = - if n % i == 0 then 0 - else if (i * i) <= n then is_prime_aux(n, i + 1) - else 1; - -def is_prime(n) = if n == 2 then 1 else is_prime_aux(n, 2); - -def primes(n) = - if n == 0 then 0 - else if is_prime(n) == 1 then (write n; primes(n - 1)) - else primes(n - 1); - -def is_collatz(n) = - if n == 1 then 1 - else if n % 2 == 0 then is_collatz(n / 2) - else is_collatz(3 * n + 1); - -def collatz_aux(n, i) = - if i > n then 0 - else if is_collatz(i) == 1 then (write i; collatz_aux(n, i + 1)) - else collatz_aux(n, i + 1); - -def collatz(n) = collatz_aux(n, 1); - -def facT(n, acc) = - if n == 0 then acc else facT(n - 1, n * acc); - - -//zero(3) -//suc(8) -//pred(7) -//write(add(3, 4)) -//mult(4,5) -//pow(2, 3) -//fib(20) -//(write(fact(5)) ; fact(6)) -//(write(1) ; 2) -//write(ack(3, 12)) // for tail-rec test -//stack_test(0) -//(write (div(11, 3)); rem(11, 3)) -//gcd(54, 24) -//is_prime(2) -primes(1000) -//primes(1000000) -//collatz(4000) -//collatz(5000) -//facT(6, 1) - diff -r e66bd5c563eb -r b5b5583a3a08 progs/detokenise.scala --- a/progs/detokenise.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -// Detokenising the ouput of Tokeniser -//===================================== -// -// call with -// -// scala detokenise.scala fib.tks -// -// scala detokenise.scala loops.tks - -object Detokenise { - -import java.io._ -import scala.util._ - -abstract class Token extends Serializable -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KWD(s: String) extends Token -case class T_STR(s: String) extends Token - -def deserialise[T](fname: String) : Try[T] = { - //import scala.util.Using - //Using(new ObjectInputStream(new FileInputStream(fname))) { - // in => in.readObject.asInstanceOf[T] - //} - Try(new ObjectInputStream(new FileInputStream(fname))).get { - in => in.readObject.asInstanceOf[T] - } -} - -def main(args: Array[String]) = { - val fname = args(0) - val tks = deserialise[List[Token]](fname).getOrElse(Nil) - println(s"Reading back from ${fname}:\n${tks.mkString(", ")}") -} - - -} diff -r e66bd5c563eb -r b5b5583a3a08 progs/fact.fun --- a/progs/fact.fun Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -// a simple factorial program -// (including a tail recursive version) - - -def fact(n) = - if n == 0 then 1 else n * fact(n - 1); - -def facT(n, acc) = - if n == 0 then acc else facT(n - 1, n * acc); - -def facTi(n) = facT(n, 1); - -//fact(10) -//facTi(10) - -write(facTi(6)) - diff -r e66bd5c563eb -r b5b5583a3a08 progs/factors.while --- a/progs/factors.while Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -// Find all factors of a given input number -// by J.R. Cordy August 2005 - -write "Input n please"; -read n; -write "The factors of n are"; -f := 2; -while n != 1 do { - while (n / f) * f == n do { - write f; - n := n / f - }; - f := f + 1 -} \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/fib.while --- a/progs/fib.while Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write "Result"; -write minus2 - diff -r e66bd5c563eb -r b5b5583a3a08 progs/fun-tests/defs.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun-tests/defs.fun Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,85 @@ + +def zero(x) = 0; + +def suc(x) = x + 1; + +def pred(x) = + if x == 0 then x else x - 1; + +def add(x, y) = + if x == 0 then y else suc(add(x - 1, y)); + +def mult(x, y) = + if x == 0 then 0 else add(y, mult(x - 1, y)); + +def pow(x, y) = + if y == 0 then 1 else mult(x, pow(x, y - 1)); + +def fib(n) = if n == 0 then 0 + else if n == 1 then 1 + else fib(n - 1) + fib(n - 2); + +def fact(n) = + if n == 0 then 1 else n * fact(n - 1); + +def ack(m, n) = if m == 0 then n + 1 + else if n == 0 then ack(m - 1, 1) + else ack(m - 1, ack(m, n - 1)); + +def stack_test(x) = x + 1 + 2 + 3 + 4 + 5; + +def div(x, y) = x / y; //integer division + +def rem(x, y) = x % y; //remainder + +def gcd(a, b) = + if b == 0 then a else gcd(b, a % b); + +def is_prime_aux(n, i) = + if n % i == 0 then 0 + else if (i * i) <= n then is_prime_aux(n, i + 1) + else 1; + +def is_prime(n) = if n == 2 then 1 else is_prime_aux(n, 2); + +def primes(n) = + if n == 0 then 0 + else if is_prime(n) == 1 then (write n; primes(n - 1)) + else primes(n - 1); + +def is_collatz(n) = + if n == 1 then 1 + else if n % 2 == 0 then is_collatz(n / 2) + else is_collatz(3 * n + 1); + +def collatz_aux(n, i) = + if i > n then 0 + else if is_collatz(i) == 1 then (write i; collatz_aux(n, i + 1)) + else collatz_aux(n, i + 1); + +def collatz(n) = collatz_aux(n, 1); + +def facT(n, acc) = + if n == 0 then acc else facT(n - 1, n * acc); + + +//zero(3) +//suc(8) +//pred(7) +//write(add(3, 4)) +//mult(4,5) +//pow(2, 3) +//fib(20) +//(write(fact(5)) ; fact(6)) +//(write(1) ; 2) +//write(ack(3, 12)) // for tail-rec test +//stack_test(0) +//(write (div(11, 3)); rem(11, 3)) +//gcd(54, 24) +//is_prime(2) +primes(1000) +//primes(1000000) +//collatz(4000) +//collatz(5000) +//facT(6, 1) + diff -r e66bd5c563eb -r b5b5583a3a08 progs/fun-tests/fact.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun-tests/fact.fun Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,17 @@ +// a simple factorial program +// (including a tail recursive version) + + +def fact(n) = + if n == 0 then 1 else n * fact(n - 1); + +def facT(n, acc) = + if n == 0 then acc else facT(n - 1, n * acc); + +def facTi(n) = facT(n, 1); + +//fact(10) +//facTi(10) + +write(facTi(6)) + diff -r e66bd5c563eb -r b5b5583a3a08 progs/fun-tests/hanoi.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun-tests/hanoi.fun Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,10 @@ +// towers of hanoi in Fun + +let rec hanoi = fun n a b c -> + if n != 0 then ( + hanoi (n - 1) a c b; + print_endline ("Move disk from pole " ^ (show a) ^ " to pole " ^ (show b)); + hanoi (n - 1) c b a + ) else ();; + +impure $ hanoi 4 1 2 3;; \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/hanoi.fun --- a/progs/hanoi.fun Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -// towers of hanoi in Fun - -let rec hanoi = fun n a b c -> - if n != 0 then ( - hanoi (n - 1) a c b; - print_endline ("Move disk from pole " ^ (show a) ^ " to pole " ^ (show b)); - hanoi (n - 1) c b a - ) else ();; - -impure $ hanoi 4 1 2 3;; \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/i.scala --- a/progs/i.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,492 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLC extends Rexp // recognises any character -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp // negation of a regular expression - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLC => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// regular expression for specifying -// ranges of characters -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// many alternatives -def Alts(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => ALT(r, Alts(rs)) -} -def ALTS(rs: Rexp*) = Alts(rs.toList) - -// repetitions -def Seqs(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => SEQ(r, Seqs(rs)) -} -def SEQS(rs: Rexp*) = Seqs(rs.toList) - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -type Rule[T] = (Rexp, List[Char] => T) - -case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { - - def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - - def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) - } - - def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } - } - - def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - - def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - -} - - -// parser combinators with input type I and return type T - -abstract class Parser[I <% Seq[_], T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head - - def parse_single(ts: I) : T = parse_all(ts).toList match { - case t::Nil => t - case _ => { println ("Parse Error") ; sys.exit(-1) } - } - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - - -// A parser and evaluator for teh while language -// -//:load matcher.scala -//:load parser3.scala - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[Rule[Token]] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Relop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// compiler -val beginning = """ -.class public XXX.XXX -.super java/lang/Object - -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - -.method public static write(I)V - .limit locals 5 - .limit stack 5 - iload 0 - getstatic java/lang/System/out Ljava/io/PrintStream; - swap - invokevirtual java/io/PrintStream/println(I)V - return -.end method - - -.method public static main([Ljava/lang/String;)V - .limit locals 200 - .limit stack 200 - -""" - -val ending = """ - - return - -.end method -""" - -// for generating new labels -var counter = -1 - -def Fresh(x: String) = { - counter += 1 - x ++ "_" ++ counter.toString() -} - -type Env = Map[String, String] -type Instrs = List[String] - -def compile_aexp(a: AExp, env : Env) : Instrs = a match { - case Num(i) => List("ldc " + i.toString + "\n") - case Var(s) => List("iload " + env(s) + "\n") - case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") - case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") - case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") -} - -def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { - case True => Nil - case False => List("goto " + jmp + "\n") - case Relop("=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") - case Relop("!=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") - case Relop("<", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") -} - - -def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { - case Skip => (Nil, env) - case Assign(x, a) => { - val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString - (compile_aexp(a, env) ++ - List("istore " + index + "\n"), env + (x -> index)) - } - case If(b, bl1, bl2) => { - val if_else = Fresh("If_else") - val if_end = Fresh("If_end") - val (instrs1, env1) = compile_bl(bl1, env) - val (instrs2, env2) = compile_bl(bl2, env1) - (compile_bexp(b, env, if_else) ++ - instrs1 ++ - List("goto " + if_end + "\n") ++ - List("\n" + if_else + ":\n\n") ++ - instrs2 ++ - List("\n" + if_end + ":\n\n"), env2) - } - case While(b, bl) => { - val loop_begin = Fresh("Loop_begin") - val loop_end = Fresh("Loop_end") - val (instrs1, env1) = compile_bl(bl, env) - (List("\n" + loop_begin + ":\n\n") ++ - compile_bexp(b, env, loop_end) ++ - instrs1 ++ - List("goto " + loop_begin + "\n") ++ - List("\n" + loop_end + ":\n\n"), env1) - } - case Write(x) => - (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) -} - -def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { - case Nil => (Nil, env) - case s::bl => { - val (instrs1, env1) = compile_stmt(s, env) - val (instrs2, env2) = compile_bl(bl, env1) - (instrs1 ++ instrs2, env2) - } -} - -def compile(input: String) : String = { - val class_name = input.split('.')(0) - val tks = Tok.fromFile(input) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) -} - - -def compile_to(input: String, output: String) = { - val fw = new java.io.FileWriter(output) - fw.write(compile(input)) - fw.close() -} - -// -val tks = Tok.fromString("x := x + 1") -val ast = Stmt.parse_single(tks) -println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) - - - -//examples - -compile_to("loops.while", "loops.j") -//compile_to("fib.while", "fib.j") - - -// testing cases for time measurements -/* -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// for testing -import scala.sys.process._ - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - - -def compile_test(n: Int) : Unit = { - val class_name = "LOOP" - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) - val fw = new java.io.FileWriter(class_name + ".j") - fw.write(assembly) - fw.close() - val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! - println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) -} - -List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) - - - -// javabyte code assmbler -// -// java -jar jvm/jasmin-2.4/jasmin.jar loops.j - -*/ - - - - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/lexer.scala --- a/progs/lexer.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,280 +0,0 @@ -// A simple lexer inspired by work of Sulzmann & Lu -//================================================== - - -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -// regular expressions including records -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - -// values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]): Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = - charlist2rexp(s.toList) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} - -def nullable(r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case RECD(_, r1) => nullable(r1) -} - -def der(c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case RECD(_, r1) => der(c, r1) -} - - -// extracts a string from value -def flatten(v: Val) : String = v match { - case Empty => "" - case Chr(c) => c.toString - case Left(v) => flatten(v) - case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) - case Stars(vs) => vs.map(flatten).mkString - case Rec(_, v) => flatten(v) -} - - -// extracts an environment from a value; -// used for tokenise a string -def env(v: Val) : List[(String, String)] = v match { - case Empty => Nil - case Chr(c) => Nil - case Left(v) => env(v) - case Right(v) => env(v) - case Sequ(v1, v2) => env(v1) ::: env(v2) - case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) -} - -// The Injection Part of the lexer - -def mkeps(r: Rexp) : Val = r match { - case ONE => Empty - case ALT(r1, r2) => - if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) - case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) - case STAR(r) => Stars(Nil) - case RECD(x, r) => Rec(x, mkeps(r)) -} - -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { - case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) - case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) - case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) - case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) - case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) - case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) -} - -// some "rectification" functions for simplification -def F_ID(v: Val): Val = v -def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) -def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) -def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Right(v) => Right(f2(v)) - case Left(v) => Left(f1(v)) -} -def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { - case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) -} -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = - (v:Val) => Sequ(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} -def F_ERROR(v: Val): Val = throw new Exception("error") - -def simp(r: Rexp): (Rexp, Val => Val) = r match { - case ALT(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (r2s, F_RIGHT(f2s)) - case (_, ZERO) => (r1s, F_LEFT(f1s)) - case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) - } - } - case SEQ(r1, r2) => { - val (r1s, f1s) = simp(r1) - val (r2s, f2s) = simp(r2) - (r1s, r2s) match { - case (ZERO, _) => (ZERO, F_ERROR) - case (_, ZERO) => (ZERO, F_ERROR) - case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) - case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) - case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) - } - } - case r => (r, F_ID) -} - -// lexing functions including simplification -def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else - { throw new Exception("lexing error") } - case c::cs => { - val (r_simp, f_simp) = simp(der(c, r)) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } -} - -def lexing_simp(r: Rexp, s: String) = - env(lex_simp(r, s.toList)) - - -// The Lexing Rules for the Fun Language - -def PLUS(r: Rexp) = r ~ r.% - -def Range(s : List[Char]) : Rexp = s match { - case Nil => ZERO - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" -val WHITESPACE = PLUS(" " | "\n" | "\t") -val RPAREN: Rexp = "{" -val LPAREN: Rexp = "}" -val STRING: Rexp = "\"" ~ SYM.% ~ "\"" - - -val WHILE_REGS = (("k" $ KEYWORD) | - ("i" $ ID) | - ("o" $ OP) | - ("n" $ NUM) | - ("s" $ SEMI) | - ("str" $ STRING) | - ("p" $ (LPAREN | RPAREN)) | - ("w" $ WHITESPACE)).% - - -// Two Simple While Tests -//======================== - -println("test: read n") - -val prog0 = """read n""" -println(lexing_simp(WHILE_REGS, prog0)) - -println("test: read n; write n ") - -val prog1 = """read n; write n""" -println(lexing_simp(WHILE_REGS, prog1)) - - -// Bigger Tests -//============== - -// escapes strings and prints them out as "", "\n" and so on -def esc(raw: String): String = { - import scala.reflect.runtime.universe._ - Literal(Constant(raw)).toString -} - -def escape(tks: List[(String, String)]) = - tks.map{ case (s1, s2) => (s1, esc(s2))} - -val prog2 = """ -write "Fib"; -read n; -minus1 := 0; -minus2 := 1; -while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 -}; -write "Result"; -write minus2 -""" - -println("lexing Fib") -println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n")) - - - -val prog3 = """ -start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - -println("lexing Loops") -println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n")) - diff -r e66bd5c563eb -r b5b5583a3a08 progs/lexer/lexer.sc --- a/progs/lexer/lexer.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/lexer/lexer.sc Thu Jul 30 13:50:54 2020 +0100 @@ -81,7 +81,7 @@ } -// extracts a string from value +// extracts a string from a value def flatten(v: Val) : String = v match { case Empty => "" case Chr(c) => c.toString diff -r e66bd5c563eb -r b5b5583a3a08 progs/loops.while --- a/progs/loops.while Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -start := 1000; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { z := z - 1 }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} - diff -r e66bd5c563eb -r b5b5583a3a08 progs/matcher/build.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/matcher/build.sc Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,15 @@ +import mill._ +import scalalib._ + +val baseDir = build.millSourcePath + + +object matcher extends ScalaModule { + def scalaVersion = "2.13.2" + def mainClass = Some("re1 all") + + //override def resources = T.sources(baseDir) + override def sources = T.sources(baseDir / "re1.sc") + + +} diff -r e66bd5c563eb -r b5b5583a3a08 progs/matcher/build.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/matcher/build.sh Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,7 @@ +#!/bin/bash +set -euo pipefail + +amm re1.sc all +amm re2.sc all +amm re3.sc all +amm re4.sc all diff -r e66bd5c563eb -r b5b5583a3a08 progs/matcher/re2.sc --- a/progs/matcher/re2.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/matcher/re2.sc Thu Jul 30 13:50:54 2020 +0100 @@ -74,20 +74,23 @@ -// test: (a?{n}) (a{n}) @doc("Test (a?{n}) (a{n})") @main def test1() = { + println("Test (a?{n}) (a{n})") + for (i <- 0 to 1000 by 100) { println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") } } -// test: (a*)* b + @doc("Test (a*)* b") @main def test2() = { - for (i <- 0 to 20) { + println("Test (a*)* b") + + for (i <- 0 to 20 by 2) { println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") } } diff -r e66bd5c563eb -r b5b5583a3a08 progs/matcher/re3.sc --- a/progs/matcher/re3.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/matcher/re3.sc Thu Jul 30 13:50:54 2020 +0100 @@ -96,19 +96,23 @@ } -//test: (a?{n}) (a{n}) +// @doc("Test (a?{n}) (a{n})") @main def test1() = { + println("Test (a?{n}) (a{n})") + for (i <- 0 to 8000 by 1000) { println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f") } } -//test: (a*)* b +// @doc("Test (a*)* b") @main def test2() = { + println("Test (a*)* b") + for (i <- 0 to 6000000 by 500000) { println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f") } diff -r e66bd5c563eb -r b5b5583a3a08 progs/matcher/re4.sc --- a/progs/matcher/re4.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/matcher/re4.sc Thu Jul 30 13:50:54 2020 +0100 @@ -3,11 +3,14 @@ // // call the test cases with X = {1,2} // -// amm re3.sc testX +// amm re4.sc testX // // or // -// amm re3.sc all +// amm re4.sc all +// +// !! DO NOT USE THIS VERSION AS A STARTING POINT !! +// !! FOR CW1 & 2! USE re3.sc INSTEAD !! abstract class Rexp diff -r e66bd5c563eb -r b5b5583a3a08 progs/mllex.scala --- a/progs/mllex.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -:load matcher.scala - - -// some regular expressions -val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", - "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", - "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", - "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", - "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", - "sharing", "sig", "signature", "struct", "structure", "then", "type", - "val", "where", "while", "with", "withtype")) - -val DIGITS = RANGE("0123456789") -val NONZERODIGITS = RANGE("123456789") - -val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") -val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES) - -val ALL = ALTS(KEYWORDS, INTEGERS) - -val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/") - - - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n".toList)) -val OPS = RANGE("+-*".toList) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_NUM extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = - tokenize(rs, s.toList).filterNot(_ match { - case T_WHITESPACE => true - case _ => false - }) - - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -tokenize_file(Nil, "nominal_library.ML") - - - - -type Grammar = List[(String, List[Token])] - -// grammar for arithmetic expressions -val grammar = - List ("E" -> List(T_NUM), - "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)), - "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)), - "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)), - "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN)) - -def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match { - case (_, Nil) => true - case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) - case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) - case _ => false -} - -def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = - ts1 match { - case Nil => None - case t::ts => - if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) - else chop(ts, prefix, t::ts2) - } - -// examples -chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) -chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) - -def replace[A](ts: List[A], out: List[A], in: List [A]) = - chop(ts, out, Nil) match { - case None => None - case Some((before, after)) => Some(before ::: in ::: after) - } - -def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match { - case List(T_NT("E", tree)) => { println(tree); true } - case _ => { - val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) - tss.flatten.exists(parse1(g, _)) - } -} - - -println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) -println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) -println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) - - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/parser-combinators/build.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser-combinators/build.sh Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,6 @@ +#!/bin/bash +set -euo pipefail + +amm comb1.sc +amm comb2.sc + diff -r e66bd5c563eb -r b5b5583a3a08 progs/parser-combinators/comb1.sc --- a/progs/parser-combinators/comb1.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/parser-combinators/comb1.sc Thu Jul 30 13:50:54 2020 +0100 @@ -1,11 +1,14 @@ // Parser Combinators: Simple Version //==================================== +// +// Call with +// +// amm comb1.sc -/* - Note, in the lectures I did not show the implicit type constraint - I : IsSeq, which means that the input type 'I' needs to be - a sequence. -*/ + +// Note, in the lectures I did not show the implicit type constraint +// I : IsSeq, which means that the input type 'I' needs to be +// a sequence. type IsSeq[A] = A => Seq[_] @@ -33,12 +36,14 @@ def parse(in: I) = p.parse(in) ++ q.parse(in) } -// parser map +// map parser class MapParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } + + // an example of an atomic parser for characters case class CharParser(c: Char) extends Parser[String, Char] { def parse(in: String) = @@ -59,6 +64,7 @@ val NumParser = RegexParser("[0-9]+".r) def StrParser(s: String) = RegexParser(Regex.quote(s).r) + // NumParserInt transforms a "string integer" into a propper Int // (needs "new" because MapParser is not a case class) @@ -210,7 +216,7 @@ // a problem with the arithmetic expression parser: it -// gets vert slow with deeply nested parentheses +// gets very slow with deeply nested parentheses println("Runtime problem") println(E.parse("1")) diff -r e66bd5c563eb -r b5b5583a3a08 progs/parser-combinators/comb2.sc --- a/progs/parser-combinators/comb2.sc Mon Jul 27 11:02:48 2020 +0100 +++ b/progs/parser-combinators/comb2.sc Thu Jul 30 13:50:54 2020 +0100 @@ -1,18 +1,28 @@ -// Parser Combinators: Simple Version +// Parser Combinators: +// Simple Version for WHILE-language //==================================== +// +// with some added convenience for +// map-parsers and grammar rules +// +// call with +// +// amm comb2.sc -// more convenience for the map parsers later on + + +// more convenience for the map parsers later on; +// it allows writing nested patterns as +// case x ~ y ~ z => ... + case class ~[+A, +B](_1: A, _2: B) -/* - Note, in the lectures I did not show the implicit type constraint - I : IsSeq, which means that the input type 'I' needs to be - a sequence. -*/ +// constraint for the input type IsSeq[A] = A => Seq[_] + abstract class Parser[I : IsSeq, T]{ def parse(in: I): Set[(T, I)] @@ -141,7 +151,7 @@ (p"false".map[BExp]{ _ => False }) || (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x } -// statement +// a single statement lazy val Stmt: Parser[String, Stmt] = ((p"skip".map[Stmt]{_ => Skip }) || (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } || @@ -223,7 +233,7 @@ def eval(bl: Block) : Env = eval_bl(bl, Map()) // parse + evaluate fib program; then lookup what is -// stored under the variable result +// stored under the variable "result" println(eval(Stmts.parse_all(fib).head)("result")) @@ -243,6 +253,7 @@ println(eval(Stmts.parse_all(factors).head)) + // calculate all prime numbers up to a number println("Primes") diff -r e66bd5c563eb -r b5b5583a3a08 progs/parser1.scala --- a/progs/parser1.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -// A naive bottom-up parser with backtracking -// -// Needs: -// :load matcher.scala - -// some regular expressions -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised - -abstract class Token -case object T_WHITESPACE extends Token -case object T_NUM extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class NT(s: String) extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -type Grammar = List[(String, List[Token])] - -// grammar for arithmetic expressions -val grammar = - List ("F" -> List(T_NUM), - "E" -> List(T_NUM), - "E" -> List(NT("E"), T_OP("+"), NT("E")), - "E" -> List(NT("E"), T_OP("-"), NT("E")), - "E" -> List(NT("E"), T_OP("*"), NT("E")), - "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) - - -def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = - ts1 match { - case Nil => None - case t::ts => - if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) - else chop(ts, prefix, t::ts2) - } - -// examples for chop -chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) -chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) - -def replace[A](ts: List[A], out: List[A], in: List [A]) = - chop(ts, out, Nil) match { - case None => None - case Some((before, after)) => Some(before ::: in ::: after) - } - -def parse(g: Grammar, ts: List[Token]) : Boolean = { - println(ts) - if (ts == List(NT("E"))) true - else { - val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) - tss.flatten.exists(parse(g, _)) - } -} - -def parser(g: Grammar, s: String) = { - println("\n") - parse(g, Tok.fromString(s)) -} - - - -parser(grammar, "2 + 3 * 4 + 1") -parser(grammar, "(2 + 3) * (4 + 1)") -parser(grammar, "(2 + 3) * 4 (4 + 1)") - - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/parser4.scala --- a/progs/parser4.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ - -// parser combinators with input type I and return type T - -case class SubString(s: String, l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def substring(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubString(s, low, high) - -} - -type Ctxt = List[(String, SubString)] - -abstract class Parser[T] { - - def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] - - def parse_all(s: String) : Set[T] = - for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case class SubStringParser(s: String) extends Parser[SubString] { - val n = s.length - def parse(sb: SubString, ctxt: Ctxt) = { - if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) - Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) - else Set() - } -} - -implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) - -class IgnLst[T](p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -// ambigous grammar -lazy val E: Parser[Int] = - new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || - ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || - "0" ==> { (s) => 0 } || - "1" ==> { (s) => 1 } || - "2" ==> { (s) => 2 } || - "3" ==> { (s) => 3 }) - -println(E.parse_all("1+2*3+3")) - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/pprint/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/pprint/README Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,7 @@ +To obtain pdf via chrome + +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --headless --disable-gpu --print-to-pdf index.html + +To autocrop the pdf to the smallest size + +pdf-crop-margins -s -u -p 0 output.pdf \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/pprint/style.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/pprint/style.css Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,99 @@ +@media print { + @page { margin: 0; } + body { margin: 1.6cm; } +} + + +body { + font-family: Calibri, Segoe, "Segoe UI", "Gill Sans", "Gill Sans MT", sans-serif; +} + +/* It's supposed to look like a tree diagram */ +.tree, .tree ul, .tree li { + list-style: none; + margin: 0; + padding: 0; + position: relative; +} + +.tree { + margin: 0 0 1em; + text-align: center; +} +.tree, .tree ul { + display: table; +} +.tree ul { + width: 100%; +} + .tree li { + display: table-cell; + padding: .5em 0; + vertical-align: top; + } + /* _________ */ + .tree li:before { + outline: solid 1px #666; + content: ""; + left: 0; + position: absolute; + right: 0; + top: 0; + } + .tree li:first-child:before {left: 50%;} + .tree li:last-child:before {right: 50%;} + + .tree code, .tree span { + border: solid .1em #666; + border-radius: .2em; + display: inline-block; + margin: 0 .2em .5em; + padding: .2em .5em; + position: relative; + } + /* If the tree represents DOM structure */ + .tree code { + font-family: monaco, Consolas, 'Lucida Console', monospace; + } + + /* | */ + .tree ul:before, + .tree code:before, + .tree span:before { + outline: solid 1px #666; + content: ""; + height: .5em; + left: 50%; + position: absolute; + } + .tree ul:before { + top: -.5em; + } + .tree code:before, + .tree span:before { + top: -.55em; + } + +/* The root node doesn't connect upwards */ +.tree > li {margin-top: 0;} + .tree > li:before, + .tree > li:after, + .tree > li > code:before, + .tree > li > span:before { + outline: none; + } + + +/* Create two equal columns that floats next to each other */ +.column { + float: left; + width: 40%; + padding: 10px; +} + +/* Clear floats after the columns */ +.row:after { + content: ""; + display: table; + clear: both; +} diff -r e66bd5c563eb -r b5b5583a3a08 progs/pprint/tree.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/pprint/tree.sc Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,109 @@ +// scalatags +import ammonite.ops._ + +import $ivy.`com.lihaoyi::scalatags:0.8.2` +import scalatags.Text.all._ +import scalatags.Text._ + +// regular expressions including records +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class SPECIAL(s: String) extends Rexp + +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) +} + +def ders(s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + +def ders_simp(s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders_simp(s, simp(der(c, r))) +} + + +def pp(r: Rexp) : TypedTag[String] = r match { + case CHAR(c) => li(code(c.toString)) + case ALT(r1, r2) => li(code("+"), ul(pp(r1), pp(r2))) + case SEQ(r1, r2) => li(code(raw("·")), ul(pp(r1), pp(r2))) + case STAR(r1) => li(code("*"), ul(pp(r1))) + case ZERO => li(code("0")) + case ONE => li(code("1")) + case SPECIAL(s) => li(code(raw(s))) +} + +def mktree(r: Rexp) = + ul(cls := "tree")(pp(r)) + + +def index(rs: List[Rexp]) = + html( + head(link(rel := "stylesheet", href := "./style.css")), + body(rs.map(mktree)) + ) + + + +/* +println(index(List(simple, test, evil2))) + +val simple = CHAR('a') +val test = ALT(CHAR('a'), CHAR('b')) + + +write.over(pwd/"t1.html", + index(List(evil2, + ders("a".toList, evil2), + ders("aa".toList, evil2), + ders("aaa".toList, evil2)))) +*/ + +val evil2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +val r1 = STAR(ALT(SPECIAL("r1"), SPECIAL("r2"))) +val r2 = ALT(SPECIAL("r1"), STAR(SPECIAL("r2"))) + +@main +def main(fname: String) = { + val content = index(List(r1)) + write.over(pwd / fname, content) +} diff -r e66bd5c563eb -r b5b5583a3a08 progs/primes.while --- a/progs/primes.while Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -// prints out prime numbers from -// 2 to 100 (end) - -end := 100; -n := 2; -while (n < end) do { - f := 2; - tmp := 0; - while ((f < n / 2 + 1) && (tmp == 0)) do { - if ((n / f) * f == n) then { tmp := 1 } else { skip }; - f := f + 1 - }; - if (tmp == 0) then { write(n) } else { skip }; - n := n + 1 -} \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/re-alt.scala --- a/progs/re-alt.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -trait RegExp { - def nullable: Boolean - def derive(c: Char): RegExp -} - -case object Empty extends RegExp { - def nullable = false - def derive(c: Char) = Empty -} - -case object Eps extends RegExp { - def nullable = true - def derive(c: Char) = Empty -} - -case class Str(s: String) extends RegExp { - def nullable = s.isEmpty - def derive(c: Char) = - if (s.isEmpty || s.head != c) Empty - else Str(s.tail) -} - -case class Cat(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = - if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) - else Cat(r.derive(c), s) -} - -case class Star(r: RegExp) extends RegExp { - def nullable = true - def derive(c: Char) = Cat(r.derive(c), this) -} - -case class Or(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable || s.nullable - def derive(c: Char) = Or(r.derive(c), s.derive(c)) -} - -case class And(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = And(r.derive(c), s.derive(c)) -} - -case class Not(r: RegExp) extends RegExp { - def nullable = !r.nullable - def derive(c: Char) = Not(r.derive(c)) -} - - - - -object Matcher { - def matches(r: RegExp, s: String): Boolean = { - if (s.isEmpty) r.nullable - else matches(r.derive(s.head), s.tail) - } -} - - -object Pimps { - implicit def string2RegExp(s: String) = Str(s) - - implicit def regExpOps(r: RegExp) = new { - def | (s: RegExp) = Or(r, s) - def & (s: RegExp) = And(r, s) - def % = Star(r) - def %(n: Int) = rep(r, n) - def ? = Or(Eps, r) - def ! = Not(r) - def ++ (s: RegExp) = Cat(r, s) - def ~ (s: String) = Matcher.matches(r, s) - } - - implicit def stringOps(s: String) = new { - def | (r: RegExp) = Or(s, r) - def | (r: String) = Or(s, r) - def & (r: RegExp) = And(s, r) - def & (r: String) = And(s, r) - def % = Star(s) - def % (n: Int) = rep(Str(s), n) - def ? = Or(Eps, s) - def ! = Not(s) - def ++ (r: RegExp) = Cat(s, r) - def ++ (r: String) = Cat(s, r) - def ~ (t: String) = Matcher.matches(s, t) - } - - def rep(r: RegExp, n: Int): RegExp = - if (n <= 0) Star(r) - else Cat(r, rep(r, n - 1)) -} - - -object Test { - import Pimps._ - - val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" - val int = ("+" | "-").? ++ digit.%(1) - val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? - - def main(args: Array[String]) { - val ints = List("0", "-4534", "+049", "99") - val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") - val errs = List("", "-", "+", "+-1", "-+2", "2-") - - ints.foreach(s => assert(int ~ s)) - reals.foreach(s => assert(!(int ~ s))) - errs.foreach(s => assert(!(int ~ s))) - - ints.foreach(s => assert(real ~ s)) - reals.foreach(s => assert(real ~ s)) - errs.foreach(s => assert(!(real ~ s))) - } -} diff -r e66bd5c563eb -r b5b5583a3a08 progs/re-internal.scala --- a/progs/re-internal.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ - -// measures the time a function needs -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 10001 by 300) { - val re = ("((a?){" + i + "})(a{" + i + "})") - println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) -} - - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re-sulzmann-partial.scala --- a/progs/re-sulzmann-partial.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -abstract class Pat - -case class VAR(x: String, r: Rexp) extends Pat -case class GRP(x: String, p: Pat) extends Pat -case class PSEQ(p1: Pat, p2: Pat) extends Pat -case class PALT(p1: Pat, p2: Pat) extends Pat -case class PSTAR(p: Pat) extends Pat - - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def down (p: Pat) : Rexp = p match { - case VAR(x: String, w: String, r: Rexp) => r - case GRP(x: String, w: String, p: Pat) => down(p) - case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2)) - case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2)) - case PSTAR(p: Pat) => STAR(down(p)) -} - -def patnullable (p: Pat) : Boolean = p match { - case PVar(_, r) => nullable(r) - case PSEQ(p1, p2) => patnullable(p1) && patnullable(p2) - case PCHOICE(p1, p2) => patnullable(p1) || patnullable(p2) - case PSTAR(p) => true - case PatVar(_, p) => patnullable(p) -} - -//type Env = Set[List[(String, String)]] -type Env = Map[Int, String] - -def update(n: Int, c: Char) (env: Env) = - env + (n -> (env.getOrElse(n, "") + c.toString)) - -def pderivPat (p: Pat, c: Char) : Set[(Pat, Env => Env)] = p match { - case PVar(n: Int, r: Rexp) => { - val pds = pderiv(r, c) - if (pds.isEmpty) Set() - else Set((PVar(n, toRexp(pds.toList)), update(n, c))) - } - case PSEQ(p1: Pat, p2: Pat) => { - val pats : Set[(Pat, Env => Env)] = - for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, p2), f) - if (nullable(strip(p1))) pats ++ pderivPat(p2, c) - else pats - } - case PCHOICE(p1: Pat, p2: Pat) => pderivPat(p1, c) ++ pderivPat(p2, c) - case PSTAR(p1: Pat) => - for ((p, f) <- pderivPat(p1, c)) yield (PSEQ(p, PSTAR(p1)), f) - case PatVar(n: Int, p1: Pat) => - for ((p, f) <- pderivPat(p1, c)) yield (PatVar(n, p), f compose (update (n, c))) -} - - -val p2 = PSEQ(PVar(1, STAR("A")), PVar(2, STAR("A"))) -pderivPat(p2, 'A').mkString("\n") - - -def greedy_aux(env: Set[(Pat, Env)], w: List[Char]) : Set[Env] = w match { - case Nil => - for ((p, e) <- env if patnullable(p)) yield e - case c::cs => { - val env2 = for ((p, e) <- env; (p1, f) <- pderivPat(p, c)) yield (p1, f(e)) - greedy_aux(env2, cs) - } -} - -def greedy(p: Pat, w: String) = { - val res = greedy_aux(Set((p, Map())), w.toList) - if (res.isEmpty) None else Some(res) -} - -// some convenience for typing in regular expressions -def charlist2rexp (s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -implicit def PatOps (p: Pat) = new { - def | (q: Pat) = PALT(p, q) - def % = PSTAR(p) - def ~ (q: Pat) = PSEQ(p, q) -} - -val p3 = PSTAR(PSEQ(PVar(1, "A"), PVar(2, "B"))) - -greedy2(Set((p3, Map())), "ABAB".toList) - - -val p4 = PVar(1, "A") -greedy2(Set((p4, Map())), "A".toList) - -val p5 = PSEQ(PVar(1, "A"), PVar(1, "B")) -greedy2(Set((p5, Map())), "AB".toList) - -val res = pderivPat(p5, 'A') -for ((_, f) <- res) yield f(Map()) - - -val p6 = PatVar(4,PSTAR(PCHOICE(PCHOICE(PVar(1, "A"), PVar(2, "AB")), PVar(3, "B")))) - -greedy(p6, "ABA") diff -r e66bd5c563eb -r b5b5583a3a08 progs/re-sulzmann.scala --- a/progs/re-sulzmann.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,164 +0,0 @@ -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -abstract class Pat - -case class VAR(x: String, w: String, r: Rexp) extends Pat -case class GRP(x: String, w: String, p: Pat) extends Pat -case class PSEQ(p1: Pat, p2: Pat) extends Pat -case class PALT(p1: Pat, p2: Pat) extends Pat -case class PSTAR(p: Pat) extends Pat - -object VAR { def apply(x: String, r: Rexp) : VAR = VAR(x, "", r) } -object GRP { def apply(x: String, p: Pat) : GRP = GRP(x, "", p) } - - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -def down (p: Pat) : Rexp = p match { - case VAR(x: String, w: String, r: Rexp) => r - case GRP(x: String, w: String, p: Pat) => down(p) - case PSEQ(p1: Pat, p2: Pat) => SEQ(down(p1), down(p2)) - case PALT(p1: Pat, p2: Pat) => ALT(down(p1), down(p2)) - case PSTAR(p: Pat) => STAR(down(p)) -} - -def empty (p: Pat) : Pat = p match { - case VAR(x: String, w: String, r: Rexp) => - if (nullable(r)) VAR(x, w, EMPTY) - else VAR(x, w, NULL) - case GRP(x: String, w: String, p: Pat) => GRP(x, w, empty(p)) - case PSEQ(p1: Pat, p2: Pat) => PSEQ(empty(p1), empty(p2)) - case PALT(p1: Pat, p2: Pat) => PALT(empty(p1), empty(p2)) - case PSTAR(p: Pat) => PSTAR(empty(p)) -} - -def patder (c: Char, p: Pat) : Pat = p match { - case VAR(x: String, w: String, r: Rexp) => VAR(x, w + c, der(c, r)) - case GRP(x: String, w: String, p: Pat) => GRP(x, w + c, patder(c, p)) - case PSEQ(p1: Pat, p2: Pat) => - if (nullable(down(p1))) PALT(PSEQ(patder(c, p1), p2), PSEQ(empty(p1), patder(c, p2))) - else PSEQ(patder(c, p1), p2) - case PALT(p1: Pat, p2: Pat) => PALT(patder(c, p1), patder(c, p2)) - case PSTAR(p: Pat) => PSEQ(patder(c, p), PSTAR(p)) -} - -def patders (s: List[Char], p: Pat) : Pat = s match { - case Nil => p - case c::s => patders(s, patder(c, p)) -} - -type Env = Set[List[(String, String)]] - -def special (p: Pat, env: Env) : Env = - if (env == Set() && nullable(down(p))) Set(Nil) else env - -def env (p: Pat) : Env = p match { - case VAR(x: String, w: String, r: Rexp) => - if (nullable(r)) Set(List((x, w))) else Set() - case GRP(x: String, w: String, p1: Pat) => - special(p, for (e <- env(p1)) yield List((x, w)) ++ e) - case PSEQ(p1: Pat, p2: Pat) => - special(p, for (e1 <- env(p1); e2 <- env(p2)) yield (e1 ++ e2)) - case PALT(p1: Pat, p2: Pat) => special(p, env(p1) ++ env(p2)) - case PSTAR(p1: Pat) => special(p, env(p1)) -} - -def matcher (p: Pat, s: String) = env(empty(patders(s.toList, p))) - - -// some convenience for typing in regular expressions -def charlist2rexp (s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp (s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -implicit def PatOps (p: Pat) = new { - def | (q: Pat) = PALT(p, q) - def % = PSTAR(p) - def ~ (q: Pat) = PSEQ(p, q) -} - - -val p0 = VAR("x", "A") - -patders("A".toList, p0) -matcher(p0, "A") - -val p1 = VAR("x", "A") | VAR("y", "A") | VAR("z", "B") - -patders("A".toList, p1) -matcher(p1, "A") -matcher(p1, "B") - -val p2 = VAR("x", "AB") -matcher(p2, "AB") -matcher(p2, "AA") - -val p3 = VAR("x", "A" ~ "B") -matcher(p3, "AB") -matcher(p3, "AA") - -val p4 = VAR("x", "A") ~ VAR("y", "B") -matcher(p4, "AB") -matcher(p4, "AA") - -val p5 = VAR("x", "A".%) -matcher(p5, "A") -matcher(p5, "AA") -matcher(p5, "") -matcher(p5, "AAA") - -val p6 = VAR("x", "A").% -matcher(p6, "A") -matcher(p6, "AA") -matcher(p6, "") -matcher(p6, "AAA") - -val p7 = (VAR("x", "A") | VAR("y", "AB") | VAR("z", "B")).% -matcher(p7, "AB") - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re.scala --- a/progs/re.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - s match { - case Nil if (nullable(r)) => Some(Nil, t) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) - } - -def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => one_string(regs, s) match { - case (rest, s) => s.mkString :: tokenize(regs, rest) - } -} - -//examples -println(tokenize(regs, "if true then then 42 else +".toList)) -println(tokenize(regs, "if+true+then+then+42+else +".toList)) -println(tokenize(regs, "ifff if 34 34".toList)) -println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(regs, "/* if true then */ then 42 else +".toList)) -//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e66bd5c563eb -r b5b5583a3a08 progs/re0.scala --- a/progs/re0.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,119 +0,0 @@ -import scala.annotation.tailrec -import scala.language.implicitConversions - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLCHAR extends Rexp -case class CHAR(c: Char) extends Rexp -case class STR(s: String) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp -case class REP(r: Rexp, n: Int) extends Rexp - -// some convenience for typing in regular expressions -implicit def string2rexp(s : String) : Rexp = STR(s) - -implicit def RexpOps(r: Rexp) : Rexp = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def %(n: Int) = REP(r, n) - def %%(n: Int) = SEQ(REP(r, n), STAR(r)) - def ? = ALT(EMPTY, r) - def unary_! = NOT(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) : Rexp = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def %(n: Int) = REP(s, n) - def %%(n: Int) = SEQ(REP(s, n), STAR(s)) - def ? = ALT(EMPTY, s) - def unary_! = NOT(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - - -// nullable function: tests whether the regular -// expression can recognise the empty string - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLCHAR => false - case CHAR(_) => false - case STR(s) => s.isEmpty - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) - case REP(r, i) => if (i == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLCHAR => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) - case REP(r, i) => - if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) -} - - -// derivative w.r.t. a string (iterates der) -@tailrec -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -//examples -val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -val int = ("+" | "-").? ~ digit.%%(1) -val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? - -val ints = List("0", "-4534", "+049", "99") -val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") -val errs = List("", "-", "+", "+-1", "-+2", "2-") - -ints.map(s => matcher(int, s)) -reals.map(s => matcher(int, s)) -errs.map(s => matcher(int, s)) - -ints.map(s => matcher(real, s)) -reals.map(s => matcher(real, s)) -errs.map(s => matcher(real, s)) - - - -def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 12000 by 500) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re1.scala --- a/progs/re1.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,154 +0,0 @@ -// A simple matcher for basic regular expressions - -abstract class Rexp -case object ZERO extends Rexp // matches nothing -case object ONE extends Rexp // matches an empty string -case class CHAR(c: Char) extends Rexp // matches a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star - -// nullable function: tests whether a regular -// expression can recognise the empty string -def nullable(r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// the derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) -} - -// the derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// the main matcher function -def matcher(r: Rexp, s: String) : Boolean = - nullable(ders(s.toList, r)) - - -// examples from the homework - -val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) -der('a', r) -der('b', r) -der('c', r) - -val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z')) -der('x', r2) -der('y', der('x', r2)) -der('z', der('y', der('x', r2))) - - -// the optional regular expression (one or zero times) -def OPT(r: Rexp) = ALT(r, ONE) - -// the n-times regular expression (explicitly expanded) -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => ONE - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - - -// Test Cases - -// the evil regular expression a?{n} a{n} -def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) - -// the evil regular expression (a*)*b -val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) - -// for measuring time -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start) / (i * 1.0e9) -} - - -// test: (a?{n}) (a{n}) -println("Test (a?{n}) (a{n})") - -for (i <- 0 to 20 by 2) { - println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") -} - -// test: (a*)* b -println("Test (a*)* b") - -for (i <- 0 to 20 by 2) { - println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") -} - - -// the size of a regular expressions - for testing purposes -def size(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size(r2) - case SEQ(r1, r2) => 1 + size(r1) + size(r2) - case STAR(r) => 1 + size(r) -} - -// the expicit expansion in EVIL1(n) increases -// drastically its size - -size(EVIL1(1)) // 5 -size(EVIL1(3)) // 17 -size(EVIL1(5)) // 29 -size(EVIL1(7)) // 41 -size(EVIL1(20)) // 119 - -// given a regular expression and building successive -// derivatives might result into bigger and bigger -// regular expressions...here is an example for this: - -// (a+b)* o a o b o (a+b)* -val BIG_aux = STAR(ALT(CHAR('a'), CHAR('b'))) -val BIG = SEQ(BIG_aux, SEQ(CHAR('a'),SEQ(CHAR('b'), BIG_aux))) - -size(ders("".toList, BIG)) // 13 -size(ders("ab".toList, BIG)) // 51 -size(ders("abab".toList, BIG)) // 112 -size(ders("ababab".toList, BIG)) // 191 -size(ders("abababab".toList, BIG)) // 288 -size(ders("ababababab".toList, BIG)) // 403 -size(ders("abababababab".toList, BIG)) // 536 - - -size(ders(("ab" * 200).toList, BIG)) // 366808 - -for (i <- 0 to 200 by 10) { - println(f"$i: ${time_needed(2, matcher(BIG, "ab" * i))}%.5f") -} - - - - -////////////////////////////////////// -def concat(A: Set[String], B: Set[String]) : Set[String] = - for (s1 <- A; s2 <- B) yield s1 ++ s2 - - -val A = Set("foo", "bar") -val B = Set("a", "b") - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re2.scala --- a/progs/re2.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -// A Version with an explicit n-times regular expression; -// this keeps the size of the regular expression in the -// EVIL1 test-case quite small - -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor for n-times - - -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - - -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) - case NTIMES(r1, i) => - if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) -} - -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -// the optional regular expression: one or zero times -// this regular expression is still defined in terms of ALT -def OPT(r: Rexp) = ALT(r, ONE) - - -// Test Cases - -// evil regular expressions -def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) -val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start) / (i * 1.0e9) -} - - - -// test: (a?{n}) (a{n}) -for (i <- 0 to 1000 by 100) { - println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") -} - -// test: (a*)* b -for (i <- 0 to 20) { - println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") -} - - -// the size of a regular expressions - for testing purposes -def size(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size(r2) - case SEQ(r1, r2) => 1 + size(r1) + size(r2) - case STAR(r) => 1 + size(r) - case NTIMES(r, _) => 1 + size(r) -} - -// EVIL1(n) has now a constant size, no matter -// what n is; also the derivative only grows -// moderately - -size(EVIL1(1)) // 7 -size(EVIL1(3)) // 7 -size(EVIL1(5)) // 7 -size(EVIL1(7)) // 7 -size(EVIL1(20)) // 7 - -size(ders("".toList, EVIL1(5))) // 7 -size(ders("a".toList, EVIL1(5))) // 16 -size(ders("aa".toList, EVIL1(5))) // 35 -size(ders("aaa".toList, EVIL1(5))) // 59 -size(ders("aaaa".toList, EVIL1(5))) // 88 -size(ders("aaaaa".toList, EVIL1(5))) // 122 -size(ders("aaaaaa".toList, EVIL1(5))) // 151 - -// but the size of the derivatives can still grow -// quite dramatically in case of EVIL2 - -size(ders("".toList, EVIL2)) // 5 -size(ders("a".toList, EVIL2)) // 12 -size(ders("aa".toList, EVIL2)) // 28 -size(ders("aaa".toList, EVIL2)) // 58 -size(ders("aaaa".toList, EVIL2)) // 116 -size(ders("aaaaa".toList, EVIL2)) // 230 -size(ders("aaaaaa".toList, EVIL2)) // 456 - -size(ders(("a" * 20).toList, EVIL2)) // 7340068 \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/re3.scala --- a/progs/re3.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -// A version with simplification of derivatives; -// this keeps the regular expressions small, which -// is good for the run-time - - -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - - - -// the nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - -// the derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) - case NTIMES(r, i) => - if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - case r => r -} - - -// the derivative w.r.t. a string (iterates der) -def ders(s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} - - -// the main matcher function -def matcher(r: Rexp, s: String) : Boolean = - nullable(ders(s.toList, r)) - - -// one or zero -def OPT(r: Rexp) = ALT(r, ONE) - - -// Test Cases - -// evil regular expressions: (a?){n} a{n} and (a*)* b -def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) -val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -//test: (a?{n}) (a{n}) -for (i <- 0 to 8000 by 1000) { - println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f") -} - -//test: (a*)* b -for (i <- 0 to 6000000 by 500000) { - println(f"$i: ${time_needed(3, matcher(EVIL2, "a" * i))}%.5f") -} - - -// size of a regular expressions - for testing purposes -def size(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size(r2) - case SEQ(r1, r2) => 1 + size(r1) + size(r2) - case STAR(r) => 1 + size(r) - case NTIMES(r, _) => 1 + size(r) -} - - -// now the size of the derivatives grows -// much, much slower - -size(ders("".toList, EVIL2)) // 5 -size(ders("a".toList, EVIL2)) // 8 -size(ders("aa".toList, EVIL2)) // 8 -size(ders("aaa".toList, EVIL2)) // 8 -size(ders("aaaa".toList, EVIL2)) // 8 -size(ders("aaaaa".toList, EVIL2)) // 8 - - - - - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re3a.scala --- a/progs/re3a.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ - -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp -case class UPNTIMES(r: Rexp, n: Int) extends Rexp - -// the nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) - case UPNTIMES(r: Rexp, n: Int) => true -} - -// the derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) - case NTIMES(r1, i) => - if (i == 0) ZERO else - if (nullable(r1)) SEQ(der(c, r1), UPNTIMES(r1, i - 1)) - else SEQ(der(c, r1), NTIMES(r1, i - 1)) - case UPNTIMES(r1, i) => - if (i == 0) ZERO - else SEQ(der(c, r1), UPNTIMES(r1, i - 1)) -} - -def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - case r => r -} - - -// the derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} - - -// the main matcher function -def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -// one or zero -def OPT(r: Rexp) = ALT(r, ONE) - -// evil regular expressions -def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) -val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -val EVIL3 = SEQ(STAR(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a')))), CHAR('b')) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - "%.5f".format((end - start) / (i * 1.0e9)) -} - - -// test: (a?{n}) (a{n}) -for (i <- 0 to 8000 by 1000) { - println(s"$i: ${time_needed(2, matches(EVIL1(i), "a" * i))}") -} - -// test: (a*)* b -for (i <- 0 to 6000000 by 500000) { - println(s"$i: ${time_needed(2, matches(EVIL2, "a" * i))}") -} - - -val r0 = simp(der('a', EVIL3)) -val r1 = simp(der('a', r0)) -val r2 = simp(der('a', r1)) -val r3 = simp(der('a', r2)) -val r4 = simp(der('a', r3)) -val r5 = simp(der('a', r4)) -val r6 = simp(der('a', r5)) - -//test: (a|aa)* b -/* -for (i <- 0 to 100 by 10) { - println(s"$i: ${time_needed(2, matches(EVIL3, "a" * i ++ "c"))}") -} - */ - diff -r e66bd5c563eb -r b5b5583a3a08 progs/re3ext.scala --- a/progs/re3ext.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - -def simp(r: Rexp): Rexp = r match { - case ALT(r1, r2) => { - (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) - } - } - case SEQ(r1, r2) => { - (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - } - case NTIMES(r, n) => NTIMES(simp(r), n) - case r => r -} - -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NTIMES(r, i) => - if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} - -def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -//one or zero -def OPT(r: Rexp) = ALT(r, ONE) - -def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -/* -for (i <- 1 to 9001 by 1000) { - println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i)))) -} -*/ - -// some convenience for typing in regular expressions -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => ONE - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps (r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - - -def PLUS(r: Rexp) = SEQ(r, STAR(r)) -def RANGE(s: List[Char]) : Rexp = s match { - case Nil => ZERO - case c::s => ALT(CHAR(c), RANGE(s)) -} -def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp = - if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1)) - - -println("Coursework:") -val REG1 = PLUS(PLUS("a" ~ "a" ~ "a")) -val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a"))) // 19 as ~ a? - - -//40 * aaa matches -//43 * aaa + aa does not -//45 * aaa + a - -println("EVIL1:") -println(matches(REG1, "aaa" * 40)) -println(matches(REG1, "aaa" * 43 + "aa")) -println(matches(REG1, "aaa" * 45 + "a")) -println("EVIL2:") -println(matches(REG2, "aaa" * 40)) -println(matches(REG2, "aaa" * 43 + "aa")) -println(matches(REG2, "aaa" * 45 + "a")) - -println("EMAIL:") -val LOWERCASE = "abcdefghijklmnopqrstuvwxyz" -val DIGITS = "0123456789" -val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~ - PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~ - NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6) - -val my_email = "christian.urban@kcl.ac.uk" - -println(matches(EMAIL, my_email)) - -println(matches(NTIMES("a", 2), "a")) -println(matches(NTIMES("a", 2), "aa")) -println(matches(NTIMES("a", 2), "aaa")) - -println(matches(NMTIMES("a", 2, 3), "a")) -println(matches(NMTIMES("a", 2, 3), "aa")) -println(matches(NMTIMES("a", 2, 3), "aaa")) -println(matches(NMTIMES("a", 2, 3), "aaaa")) diff -r e66bd5c563eb -r b5b5583a3a08 progs/re4.scala --- a/progs/re4.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,156 +0,0 @@ -// A version which attempts to move whole strings, not -// just characters, under derivatives whenever possible - -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp - -// the nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - -// the derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) - case NTIMES(r1, i) => - if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) -} - -def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - case r => r -} - -// an example -val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z')) -der('x', r) -der('y', der('x', r)) -der('z', der('y', der('x', r))) -simp(der('z', der('y', der('x', r)))) - -// *new* -// the derivative w.r.t. a string (iterates der) -def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match { - case (Nil, r) => r - case (s, ZERO) => ZERO - case (s, ONE) => if (s == Nil) ONE else ZERO - case (s, CHAR(c)) => if (s == List(c)) ONE else - if (s == Nil) CHAR(c) else ZERO - case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2)) - case (c::s, r) => ders2(s, simp(der(c, r))) -} - -// the main matcher function -def matcher(r: Rexp, s: String) : Boolean = - nullable(ders2(s.toList, r)) - - -// one or zero -def OPT(r: Rexp) = ALT(r, ONE) - - -// Test Cases - -def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n)) -val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) - -// for measuring time -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start) / (i * 1.0e9) -} - - -// test: (a?{n}) (a{n}) -for (i <- 0 to 11000 by 1000) { - println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f") -} - - -// test: (a*)* b -for (i <- 0 to 7000000 by 500000) { - println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f") -} - - - -// the size of a regular expressions - for testing purposes -def size(r: Rexp) : Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size(r2) - case SEQ(r1, r2) => 1 + size(r1) + size(r2) - case STAR(r) => 1 + size(r) - case NTIMES(r, _) => 1 + size(r) -} - - -// string of a regular expressions - for testing purposes -def string(r: Rexp) : String = r match { - case ZERO => "0" - case ONE => "1" - case CHAR(c) => c.toString - case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})" - case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}" - case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" - case STAR(r) => s"(${string(r)})*" - case NTIMES(r, n) => s"(${string(r)}){${n}}" -} - - -// test: ("a" | "aa")* -val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))) - -// test: ("" | "a" | "aa")* -val EVIL4 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))) - -val t1 = ders2("a".toList, EVIL3) -val t1s = simp(t1) -val t2 = ders2("aa".toList, t1s) -val t2s = simp(t2) -val t3 = ders2("aaa".toList, t2s) -val t3s = simp(t3) -val t4 = ders2("aaaa".toList, t3s) -val t4s = simp(t4) -println(string(t1) + " " + size(t1)) -println("s " + string(t1s) + " " + size(t1s)) -println(string(t2) + " " + size(t2)) -println("s " + string(t2s) + " " + size(t2s)) -println(string(t3) + " " + size(t3)) -println("s " + string(t3s) + " " + size(t3s)) -println(string(t4) + " " + size(t4)) -println("s " + string(t4s) + " " + size(t4s)) diff -r e66bd5c563eb -r b5b5583a3a08 progs/regexp2.scala --- a/progs/regexp2.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - s match { - case Nil if (nullable(r)) => Some(Nil, t) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) - } - -def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => one_string(regs, s) match { - case (rest, s) => s.mkString :: tokenize(regs, rest) - } -} - -//examples -println(tokenize(regs, "if true then then 42 else +".toList)) -println(tokenize(regs, "if+true+then+then+42+else +".toList)) -println(tokenize(regs, "ifff if 34 34".toList)) -println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(regs, "/* if true then */ then 42 else +".toList)) -//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r e66bd5c563eb -r b5b5583a3a08 progs/rev.scala --- a/progs/rev.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -def rev(r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ONE - case CHAR(c) => CHAR(c) - case ALT(r1, r2) => ALT(rev(r1), rev(r2)) - case SEQ(r1, r2) => SEQ(rev(r2), rev(r1)) - case STAR(r) => STAR(rev(r)) -} diff -r e66bd5c563eb -r b5b5583a3a08 progs/token-bak.scala --- a/progs/token-bak.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -import scala.language.implicitConversions -import scala.language.reflectiveCalls -import scala.util._ -import scala.annotation.tailrec - -sealed abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" - -//regular expressions ranked by position in the list -val regs: List[Rexp] = - List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE) - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def zeroable (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => zeroable(r1) && zeroable(r2) - case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) - case STAR(_) => false -} - -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - - -// calculates derivatives until all of them are zeroable -@tailrec -def munch(s: List[Char], - pos: Int, - rs: List[Rexp], - last: Option[Int]): Option[Int] = rs match { - case Nil => last - case rs if (s.length <= pos) => last - case rs => { - val ders = rs.map(der(s(pos), _)) - val rs_nzero = ders.filterNot(zeroable(_)) - val rs_nulls = ders.filter(nullable(_)) - val new_last = if (rs_nulls != Nil) Some(pos) else last - munch(s, 1 + pos, rs_nzero, new_last) - } -} - -// iterates the munching function and prints -// out the component strings -@tailrec -def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match { - case None if (s == "") => println("EOF") - case None => println(s"Lexing error: $s") - case Some(n) => { - val (head, tail) = s.splitAt(n + 1) - print(s"|${head.replaceAll("\n","RET")}|") - tokenize(tail, rs) - } -} - - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - -tokenize(test_prog, regs) diff -r e66bd5c563eb -r b5b5583a3a08 progs/while-tests/collatz.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while-tests/collatz.while Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,8 @@ +write "Input a number "; +read n; +while n > 1 do { + if n % 2 == 0 + then n := n/2 + else n := 3*n+1; +}; +write "Yes"; diff -r e66bd5c563eb -r b5b5583a3a08 progs/while-tests/factors.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while-tests/factors.while Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,14 @@ +// Find all factors of a given input number +// by J.R. Cordy August 2005 + +write "Input n please"; +read n; +write "The factors of n are"; +f := 2; +while n != 1 do { + while (n / f) * f == n do { + write f; + n := n / f + }; + f := f + 1 +} \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/while-tests/fib.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while-tests/fib.while Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,13 @@ +write "Fib"; +read n; +minus1 := 0; +minus2 := 1; +while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 +}; +write "Result"; +write minus2 + diff -r e66bd5c563eb -r b5b5583a3a08 progs/while-tests/loops.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while-tests/loops.while Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,14 @@ +start := 1000; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { z := z - 1 }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} + diff -r e66bd5c563eb -r b5b5583a3a08 progs/while-tests/primes.while --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/while-tests/primes.while Thu Jul 30 13:50:54 2020 +0100 @@ -0,0 +1,15 @@ +// prints out prime numbers from +// 2 to 100 (end) + +end := 100; +n := 2; +while (n < end) do { + f := 2; + tmp := 0; + while ((f < n / 2 + 1) && (tmp == 0)) do { + if ((n / f) * f == n) then { tmp := 1 } else { skip }; + f := f + 1 + }; + if (tmp == 0) then { write(n) } else { skip }; + n := n + 1 +} \ No newline at end of file diff -r e66bd5c563eb -r b5b5583a3a08 progs/while.scala --- a/progs/while.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -// A parser and evaluator for the while language -// -import matcher._ -import parser._ - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - -def len(xs: List[(Int, Int)]): Int = xs match { - case Nil => 0 - case (1, x)::xs => len(xs) + 1 - case (_, x)::xs => len(xs) -} - -def fst(p: (Int, Int)): Int = p match { - case Nil => 0 - case (1, x)::xs => len(xs) + 1 - case (_, x)::xs => len(xs) -} - - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || - (T_KWD("true") ==> ((_) => True)) || - (T_KWD("false") ==> ((_) => False: BExp)) - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - - -// examples -val p1 = "x := 5" -val p1_toks = Tok.fromString(p1) -val p1_ast = Block.parse_all(p1_toks) -println(p1_toks) -println(p1_ast) - -val p1a = "{ x := 5; y := 8}" -val p1a_toks = Tok.fromString(p1a) -val p1a_ast = Block.parse_all(p1a_toks) -println(p1a_ast) - -val p2 = "5 = 6" -val p2_toks = Tok.fromString(p2) -val p2_ast = BExp.parse_all(p2_toks) -println(p2_ast) - -val p2a = "true" -val p2a_toks = Tok.fromString(p2a) -val p2a_ast = BExp.parse_all(p2a_toks) -println(p2a_ast) - -val p3 = "if true then skip else skip" -val p3_toks = Tok.fromString(p3) -val p3_ast = Stmt.parse_all(p3_toks) -println(p3_ast) - -val p3a = "if true then x := 5 else x := 10" -val p3a_toks = Tok.fromString(p3a) -val p3a_ast = Stmt.parse_all(p3a_toks) -println(p3a_ast) - -val p3b = "if false then x := 5 else x := 10" -val p3b_toks = Tok.fromString(p3b) -val p3b_ast = Stmt.parse_all(p3b_toks) -println(p3b_ast) - -// multiplication -val p4 = """{ x := 5; - y := 4; - r := 0; - while y > 0 do { - r := r + x; - y := y - 1 - } - }""" -val p4_toks = Tok.fromString(p4) -val p4_ast = Block.parse_all(p4_toks) -println(p4_ast) - -val p5 = """ - n := 9; - minus1 := 0; - minus2 := 1; - temp := 0; - while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 - }; - fib_res := minus2 -""" -val p5_toks = Tok.fromString(p5) -val p5_ast = Stmts.parse_all(p5_toks) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -//examples -println(eval_stmt(p3a_ast.head, Map.empty)) -println(eval_stmt(p3b_ast.head, Map.empty)) -println(eval_bl(p4_ast.head, Map.empty)) -println(eval_bl(p5_ast.head, Map.empty)) diff -r e66bd5c563eb -r b5b5583a3a08 progs/while1.scala --- a/progs/while1.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,221 +0,0 @@ -// A parser and evaluator for the WHILE language -// - - - - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env - case Write(x) => { println(env(x)); env } -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -def eval_prog(name: String) : Env = { - val tks = Tok.fromFile(name) - val ast = Stmts.parse_single(tks) - eval_bl(ast, Map.empty) -} - - -//examples - -//eval_prog("loops.while") -eval_prog("fib.while") - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - - - -def eval_test(n: Int) : Unit = { - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) -} - -List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) - - - - - - -