# HG changeset patch # User Christian Urban # Date 1680643869 -3600 # Node ID 2bf1516d730feb865dff8da6cbd0db2c609667d6 # Parent 15973df32613536ff076bccf42716c85bb174444 updated diff -r 15973df32613 -r 2bf1516d730f hws/hw09.pdf Binary file hws/hw09.pdf has changed diff -r 15973df32613 -r 2bf1516d730f hws/hw09.tex --- a/hws/hw09.tex Wed Dec 21 14:33:05 2022 +0000 +++ b/hws/hw09.tex Tue Apr 04 22:31:09 2023 +0100 @@ -2,6 +2,7 @@ \usepackage{../style} \usepackage{../graphics} \usepackage{../langs} +\usepackage{../grammar} \begin{document} @@ -92,6 +93,25 @@ } \end{lstlisting} +\item As an optimisation technique, a compiler might want to detect `dead code' and + not generate anything for this code. Why does this optimisation technique have the + potential of speeding up the run-time of a program? (Hint: On what CPUs are programs + run nowadays?) + +\item In an earlier question, we analysed the advantages of having a lexer-phase + before running the parser (having a lexer is definitely a good thing to have). But you + might wonder if a lexer can also be implemented by a parser and some simple + grammar rules. Consider for example: + + \begin{plstx}[margin=1cm] + : \meta{S\/} ::= (\meta{Kw\/}\mid \meta{Id\/}\mid \meta{Ws\/}) \cdot \meta{S\/} \;\mid\; \epsilon\\ + : \meta{Kw\/} ::= \texttt{if} \mid \texttt{then} \mid \ldots\\ + : \meta{Id\/} ::= (\texttt{a} \mid\ldots\mid \texttt{z}) \cdot \meta{Id\/} \;\mid\; \epsilon\\ + : \meta{Ws\/} ::= \ldots\\ + \end{plstx} + + What is wrong with implementing a lexer in this way? + \item What is the difference between a parse tree and an abstract syntax tree? Give some simple examples for each of them. diff -r 15973df32613 -r 2bf1516d730f progs/catastrophic/catastrophic9.java --- a/progs/catastrophic/catastrophic9.java Wed Dec 21 14:33:05 2022 +0000 +++ b/progs/catastrophic/catastrophic9.java Tue Apr 04 22:31:09 2023 +0100 @@ -20,6 +20,7 @@ public class catastrophic9 { public static void main(String[] args) { + //we always run all the tests twice -> to warmup of the JVM for (int runs = 0; runs < 2; runs++) { diff -r 15973df32613 -r 2bf1516d730f progs/compile-lexer.scala --- a/progs/compile-lexer.scala Wed Dec 21 14:33:05 2022 +0000 +++ b/progs/compile-lexer.scala Tue Apr 04 22:31:09 2023 +0100 @@ -478,12 +478,6 @@ .class public XXX.XXX .super java/lang/Object -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - .method public static write(I)V .limit locals 5 .limit stack 5 diff -r 15973df32613 -r 2bf1516d730f progs/compile_arrays.scala --- a/progs/compile_arrays.scala Wed Dec 21 14:33:05 2022 +0000 +++ b/progs/compile_arrays.scala Tue Apr 04 22:31:09 2023 +0100 @@ -34,17 +34,11 @@ // compiler headers needed for the JVM -// (contains an init method, as well as methods for read and write) +// (contains methods for read and write) val beginning = """ .class public XXX.XXX .super java/lang/Object -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - .method public static write(I)V .limit locals 1 .limit stack 2 diff -r 15973df32613 -r 2bf1516d730f progs/parser-combinators/comb1-2.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser-combinators/comb1-2.sc Tue Apr 04 22:31:09 2023 +0100 @@ -0,0 +1,241 @@ +// Parser Combinators: Simple Version +//==================================== +// +// Call with Ammonite (Scala 2.13.10) +// +// amm comb1-2.sc + + +// Note, in the lectures I did not show the implicit type bound +// I : IsSeq, which means that the input type 'I' needs to be +// a sequence. + +type IsSeq[A] = A => Seq[_] + +abstract class Parser[I : IsSeq, T]{ + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if tl.isEmpty) yield hd +} + +// parser combinators + +// alternative parser +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) +} + +// sequence parser +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) +} + +// map parser +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) +} + + + +// an example of an atomic parser for characters +case class CharParser(c: Char) extends Parser[String, Char] { + def parse(in: String) = + if (in != "" && in.head == c) Set((c, in.tail)) else Set() +} + +CharParser('c').parse("abc") + +// an atomic parser for parsing strings according to a regex +import scala.util.matching.Regex + +case class RegexParser(reg: Regex) extends Parser[String, String] { + def parse(in: String) = reg.findPrefixMatchOf(in) match { + case None => Set() + case Some(m) => Set((m.matched, m.after.toString)) + } +} + +// atomic parsers for numbers and "verbatim" strings +val NumParser = RegexParser("[0-9]+".r) +def StrParser(s: String) = RegexParser(Regex.quote(s).r) + +NumParser.parse("a123a123bc") +StrParser("else").parse("eelsethen") + + +// NumParserInt transforms a "string integer" into a proper Int +// (needs "new" because MapParser is not a case class) + +val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt) +NumParserInt.parse("123abc") + +// the following string interpolation allows us to write +// StrParser(_some_string_) more conveniently as +// +// p"<_some_string_>" + + +implicit def parser_interpolation(sc: StringContext) = new { + def p(args: Any*) = StrParser(sc.s(args:_*)) +} + +(p"else").parse("elsethen") + +// more convenient syntax for parser combinators + +implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) + def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) +} + +// example +def toU(s: String) = s.map(_.toUpper) +(p"ELSE").map(toU(_)).parse("ELSEifthen") + +// these implicits allow us to use an infix notation for +// sequences and alternatives; we also can write the usual +// map for a MapParser + + +// with this NumParserInt can now be written more conveniently +// as: + +val NumParserInt2 = NumParser.map(_.toInt) + + + +// A parser for palindromes (just returns them as string) +lazy val Pal : Parser[String, String] = { + (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || + (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || + p"a" || p"b" || p"" +} + +// examples +Pal.parse_all("abaaaba") +Pal.parse("abaaaba") + +println("Palindrome: " + Pal.parse_all("abaaaba")) + +// A parser for wellnested parentheses +// +// P ::= ( P ) P | epsilon +// +// (transforms '(' -> '{' , ')' -> '}' ) +lazy val P : Parser[String, String] = { + (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || + p"" +} + +println(P.parse_all("(((()()))())")) +println(P.parse_all("(((()()))()))")) +println(P.parse_all(")(")) +println(P.parse_all("()")) + +// A parser for arithmetic expressions (Terms and Factors) + +lazy val E: Parser[String, Int] = { + (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } || + (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T } +lazy val T: Parser[String, Int] = { + (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F } +lazy val F: Parser[String, Int] = { + (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt } + +println(E.parse_all("1+3+4")) +println(E.parse("1+3+4")) +println(E.parse_all("4*2+3")) +println(E.parse_all("4*(2+3)")) +println(E.parse_all("(4)*((2+3))")) +println(E.parse_all("4/2+3")) +println(E.parse("1 + 2 * 3")) +println(E.parse_all("(1+2)+3")) +println(E.parse_all("1+2+3")) + + +// with parser combinators (and other parsing algorithms) +// no left-recursion is allowed, otherwise they will loop + +lazy val EL: Parser[String, Int] = + ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || + (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || + (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || + NumParserInt) + +// this will run forever: +//println(EL.parse_all("1+2+3")) + + +// non-ambiguous vs ambiguous grammars + +// ambiguous +lazy val S : Parser[String, String] = + (p"1" ~ S ~ S).map{ case ((x, y), z) => x + y + z } || p"" + +//println(time(S.parse("1" * 10))) +//println(time(S.parse_all("1" * 10))) + +// non-ambiguous +lazy val U : Parser[String, String] = + (p"1" ~ U).map{ case (x, y) => x + y } || p"" + +//println(time(U.parse("1" * 10))) +//println(time(U.parse_all("1" * 10))) +println(U.parse("1" * 25)) + +U.parse("11") +U.parse("11111") +U.parse("11011") + +U.parse_all("1" * 100) +U.parse_all("1" * 100 + "0") + +// you can see the difference in second example +//S.parse_all("1" * 100) // succeeds +//S.parse_all("1" * 100 + "0") // fails + + +// A variant which counts how many 1s are parsed +lazy val UCount : Parser[String, Int] = + (p"1" ~ UCount).map{ case (_, y) => y + 1 } || p"".map{ _ => 0 } + +println(UCount.parse("11111")) +println(UCount.parse_all("11111")) + +// Two single character parsers +lazy val One : Parser[String, String] = p"a" +lazy val Two : Parser[String, String] = p"b" + +One.parse("a") +One.parse("aaa") + +// note how the pairs nest to the left with sequence parsers +(One ~ One).parse("aaa") +(One ~ One ~ One).parse("aaa") +(One ~ One ~ One ~ One).parse("aaaa") + +(One || Two).parse("aaa") + + + +// a problem with the arithmetic expression parser: it +// gets very slow with deeply nested parentheses + +println("Runtime problem") +println(E.parse("1")) +println(E.parse("(1)")) +println(E.parse("((1))")) +//println(E.parse("(((1)))")) +//println(E.parse("((((1))))")) +//println(E.parse("((((((1))))))")) +//println(E.parse("(((((((1)))))))")) +//println(E.parse("((((((((1)))))))")) + diff -r 15973df32613 -r 2bf1516d730f progs/parser-combinators/comb1.sc --- a/progs/parser-combinators/comb1.sc Wed Dec 21 14:33:05 2022 +0000 +++ b/progs/parser-combinators/comb1.sc Tue Apr 04 22:31:09 2023 +0100 @@ -6,39 +6,37 @@ // amm comb1.sc -// Note, in the lectures I did not show the implicit type constraint -// I : IsSeq, which means that the input type 'I' needs to be -// a sequence. +// Note, in the lectures I did not show the type bound +// using is: I => Seq[_], which means that the input +// type 'I' needs to be a sequence. -type IsSeq[A] = A => Seq[_] - -abstract class Parser[I : IsSeq, T]{ +abstract class Parser[I, T](using is: I => Seq[_]) { def parse(in: I): Set[(T, I)] def parse_all(in: I) : Set[T] = for ((hd, tl) <- parse(in); - if tl.isEmpty) yield hd + if is(tl).isEmpty) yield hd } // parser combinators // alternative parser -class AltParser[I : IsSeq, T](p: => Parser[I, T], - q: => Parser[I, T]) extends Parser[I, T] { +class AltParser[I, T](p: => Parser[I, T], + q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] { def parse(in: I) = p.parse(in) ++ q.parse(in) } // sequence parser -class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], - q: => Parser[I, S]) extends Parser[I, (T, S)] { +class SeqParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, (T, S)] { def parse(in: I) = for ((hd1, tl1) <- p.parse(in); (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) } // map parser -class MapParser[I : IsSeq, T, S](p: => Parser[I, T], - f: T => S) extends Parser[I, S] { +class MapParser[I, T, S](p: => Parser[I, T], + f: T => S)(using I => Seq[_]) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -50,7 +48,7 @@ if (in != "" && in.head == c) Set((c, in.tail)) else Set() } -CharParser('c').parse("abc") +CharParser('a').parse("abc") // an atomic parser for parsing strings according to a regex import scala.util.matching.Regex @@ -67,7 +65,7 @@ def StrParser(s: String) = RegexParser(Regex.quote(s).r) NumParser.parse("a123a123bc") -StrParser("else").parse("eelsethen") +StrParser("else").parse("elsethen") // NumParserInt transforms a "string integer" into a propper Int @@ -81,23 +79,22 @@ // // p"<_some_string_>" +extension (sc: StringContext) + def p(args: Any*) = StrParser(sc.s(args:_*)) -implicit def parser_interpolation(sc: StringContext) = new { - def p(args: Any*) = StrParser(sc.s(args:_*)) -} (p"else").parse("elsethen") // more convenient syntax for parser combinators -implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { +extension [I, T](p: Parser[I, T])(using I => Seq[_]) { def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) - def map[S](f: => T => S) = new MapParser[I, T, S](p, f) + def mapp[S](f: => T => S) = new MapParser[I, T, S](p, f) } def toU(s: String) = s.map(_.toUpper) -(p"ELSE").map(toU(_)).parse("ELSEifthen") +(p"ELSE").mapp(toU(_)).parse("ELSEifthen") // these implicits allow us to use an infix notation for // sequences and alternatives; we also can write the usual @@ -110,12 +107,10 @@ val NumParserInt2 = NumParser.map(_.toInt) - - // A parser for palindromes (just returns them as string) lazy val Pal : Parser[String, String] = { - (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || - (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || + (p"a" ~ Pal ~ p"a").mapp{ case ((x, y), z) => s"$x$y$z" } || + (p"b" ~ Pal ~ p"b").mapp{ case ((x, y), z) => s"$x$y$z" } || p"a" || p"b" || p"" } @@ -131,7 +126,7 @@ // // (transforms '(' -> '{' , ')' -> '}' ) lazy val P : Parser[String, String] = { - (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || + (p"(" ~ P ~ p")" ~ P).mapp{ case (((_, x), _), y) => "{" + x + "}" + y } || p"" } @@ -233,13 +228,28 @@ println(E.parse("1")) println(E.parse("(1)")) println(E.parse("((1))")) -//println(E.parse("(((1)))")) -//println(E.parse("((((1))))")) +println(E.parse("(((1)))")) +println(E.parse("((((1))))")) //println(E.parse("((((((1))))))")) //println(E.parse("(((((((1)))))))")) -//println(E.parse("((((((((1)))))))")) +//println(E.parse("((((((((1))))))))")) +// faster because of merge +lazy val E2: Parser[String, Int] = { + (T2 ~ (p"+" || p"-") ~ E2).mapp[Int]{ case ((x, y), z) => if (y == "+") x + z else x - z} || T2 } +lazy val T2: Parser[String, Int] = { + (F2 ~ p"*" ~ T2).mapp[Int]{ case ((x, _), z) => x * z } || F2 } +lazy val F2: Parser[String, Int] = { + (p"(" ~ E2 ~ p")").mapp[Int]{ case ((_, y), _) => y } || NumParserInt } -// runs with amm2 and amm3 +println("Runtime problem") +println(E2.parse("1")) +println(E2.parse("(1)")) +println(E2.parse("((1))")) +println(E2.parse("(((1)))")) +println(E2.parse("((((1))))")) +//println(E2.parse("((((((1))))))")) +//println(E2.parse("(((((((1)))))))")) +//println(E2.parse("((((((((1))))))))")) \ No newline at end of file diff -r 15973df32613 -r 2bf1516d730f progs/parser-combinators/comb2-2.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser-combinators/comb2-2.sc Tue Apr 04 22:31:09 2023 +0100 @@ -0,0 +1,280 @@ +// Parser Combinators: +// Simple Version for WHILE-language +//==================================== +// +// with some added convenience for +// map-parsers and grammar rules +// +// call with +// +// amm comb2.sc + + +// more convenience for the map parsers later on; +// it allows writing nested patterns as +// case x ~ y ~ z => ... + + + +case class ~[+A, +B](x: A, y: B) + +// constraint for the input +type IsSeq[A] = A => Seq[_] + + +abstract class Parser[I : IsSeq, T]{ + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if tl.isEmpty) yield hd +} + +// parser combinators + +// sequence parser +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, ~[T, S]] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) +} + +// alternative parser +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) +} + +// map parser +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) +} + + + +// atomic parser for (particular) strings +case class StrParser(s: String) extends Parser[String, String] { + def parse(sb: String) = { + val (prefix, suffix) = sb.splitAt(s.length) + if (prefix == s) Set((prefix, suffix)) else Set() + } +} + +// atomic parser for identifiers (variable names) +case object IdParser extends Parser[String, String] { + val reg = "[a-z][a-z,0-9]*".r + def parse(sb: String) = reg.findPrefixOf(sb) match { + case None => Set() + case Some(s) => Set(sb.splitAt(s.length)) + } +} + + +// atomic parser for numbers (transformed into ints) +case object NumParser extends Parser[String, Int] { + val reg = "[0-9]+".r + def parse(sb: String) = reg.findPrefixOf(sb) match { + case None => Set() + case Some(s) => { + val (hd, tl) = sb.splitAt(s.length) + Set((hd.toInt, tl)) + } + } +} + +// the following string interpolation allows us to write +// StrParser(_some_string_) more conveniently as +// +// p"<_some_string_>" + +implicit def parser_interpolation(sc: StringContext) = new { + def p(args: Any*) = StrParser(sc.s(args:_*)) +} + +// more convenient syntax for parser combinators +implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) + def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) +} + + + +// the abstract syntax trees for the WHILE language +abstract class Stmt +abstract class AExp +abstract class BExp + +type Block = List[Stmt] + +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp +case class And(b1: BExp, b2: BExp) extends BExp +case class Or(b1: BExp, b2: BExp) extends BExp + + +// arithmetic expressions +lazy val AExp: Parser[String, AExp] = + (Te ~ p"+" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } || + (Te ~ p"-" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te +lazy val Te: Parser[String, AExp] = + (Fa ~ p"*" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || + (Fa ~ p"/" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa +lazy val Fa: Parser[String, AExp] = + (p"(" ~ AExp ~ p")").map{ case _ ~ y ~ _ => y } || + IdParser.map(Var) || + NumParser.map(Num) + +// boolean expressions with some simple nesting +lazy val BExp: Parser[String, BExp] = + (AExp ~ p"==" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || + (AExp ~ p"!=" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || + (AExp ~ p"<" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || + (AExp ~ p">" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } || + (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } || + (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } || + (p"true".map[BExp]{ _ => True }) || + (p"false".map[BExp]{ _ => False }) || + (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x } + +// a single statement +lazy val Stmt: Parser[String, Stmt] = + ((p"skip".map[Stmt]{_ => Skip }) || + (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } || + (p"write(" ~ IdParser ~ p")").map[Stmt]{ case _ ~ y ~ _ => Write(y) } || + (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block) + .map[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } || + (p"while" ~ BExp ~ p"do" ~ Block).map[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) }) + + +// statements +lazy val Stmts: Parser[String, Block] = + (Stmt ~ p";" ~ Stmts).map[Block]{ case x ~ _ ~ z => x :: z } || + (Stmt.map[Block]{ s => List(s) }) + +// blocks (enclosed in curly braces) +lazy val Block: Parser[String, Block] = + ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || + (Stmt.map(s => List(s)))) + + +// Examples +Stmt.parse_all("x2:=5+3") +Block.parse_all("{x:=5;y:=8}") +Block.parse_all("if(false)then{x:=5}else{x:=10}") + + +val fib = """n := 10; + minus1 := 0; + minus2 := 1; + temp := 0; + while (n > 0) do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 + }; + result := minus2""".replaceAll("\\s+", "") + +Stmts.parse_all(fib) + + +// an interpreter for the WHILE language +type Env = Map[String, Int] + +def eval_aexp(a: AExp, env: Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) + case Aop("/", a1, a2) => eval_aexp(a1, env) / eval_aexp(a2, env) +} + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("==", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) + case And(b1, b2) => eval_bexp(b1, env) && eval_bexp(b2, env) + case Or(b1, b2) => eval_bexp(b1, env) || eval_bexp(b2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env + case Write(x) => { println(env(x)) ; env } +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +def eval(bl: Block) : Env = eval_bl(bl, Map()) + +// parse + evaluate fib program; then lookup what is +// stored under the variable "result" +println(eval(Stmts.parse_all(fib).head)("result")) + + +// more examles + +// calculate and print all factors bigger +// than 1 and smaller than n +println("Factors") + +val factors = + """n := 12; + f := 2; + while (f < n / 2 + 1) do { + if ((n / f) * f == n) then { write(f) } else { skip }; + f := f + 1 + }""".replaceAll("\\s+", "") + +println(eval(Stmts.parse_all(factors).head)) + + +// calculate all prime numbers up to a number +println("Primes") + +val primes = + """end := 100; + n := 2; + while (n < end) do { + f := 2; + tmp := 0; + while ((f < n / 2 + 1) && (tmp == 0)) do { + if ((n / f) * f == n) then { tmp := 1 } else { skip }; + f := f + 1 + }; + if (tmp == 0) then { write(n) } else { skip }; + n := n + 1 + }""".replaceAll("\\s+", "") + +println(eval(Stmts.parse_all(primes).head)) + + + + + +// runs with amm2 and amm3 diff -r 15973df32613 -r 2bf1516d730f progs/parser-combinators/comb2.sc --- a/progs/parser-combinators/comb2.sc Wed Dec 21 14:33:05 2022 +0000 +++ b/progs/parser-combinators/comb2.sc Tue Apr 04 22:31:09 2023 +0100 @@ -18,37 +18,33 @@ case class ~[+A, +B](x: A, y: B) -// constraint for the input -type IsSeq[A] = A => Seq[_] - - -abstract class Parser[I : IsSeq, T]{ - def parse(in: I): Set[(T, I)] +abstract class Parser[I, T](using is: I => Seq[_]) { + def parse(in: I): Set[(T, I)] def parse_all(in: I) : Set[T] = for ((hd, tl) <- parse(in); - if tl.isEmpty) yield hd + if is(tl).isEmpty) yield hd } // parser combinators +// alternative parser +class AltParser[I, T](p: => Parser[I, T], + q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) +} + // sequence parser -class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], - q: => Parser[I, S]) extends Parser[I, ~[T, S]] { +class SeqParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] { def parse(in: I) = for ((hd1, tl1) <- p.parse(in); (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } -// alternative parser -class AltParser[I : IsSeq, T](p: => Parser[I, T], - q: => Parser[I, T]) extends Parser[I, T] { - def parse(in: I) = p.parse(in) ++ q.parse(in) -} - // map parser -class MapParser[I : IsSeq, T, S](p: => Parser[I, T], - f: T => S) extends Parser[I, S] { +class MapParser[I, T, S](p: => Parser[I, T], + f: T => S)(using I => Seq[_]) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -89,19 +85,20 @@ // // p"<_some_string_>" -implicit def parser_interpolation(sc: StringContext) = new { - def p(args: Any*) = StrParser(sc.s(args:_*)) -} +extension (sc: StringContext) + def p(args: Any*) = StrParser(sc.s(args:_*)) + // more convenient syntax for parser combinators -implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { +extension [I, T](p: Parser[I, T])(using I => Seq[_]) { def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) - def map[S](f: => T => S) = new MapParser[I, T, S](p, f) + def mapp[S](f: => T => S) = new MapParser[I, T, S](p, f) } + // the abstract syntax trees for the WHILE language abstract class Stmt abstract class AExp @@ -128,47 +125,47 @@ // arithmetic expressions lazy val AExp: Parser[String, AExp] = - (Te ~ p"+" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } || - (Te ~ p"-" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te + (Te ~ p"+" ~ AExp).mapp[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } || + (Te ~ p"-" ~ AExp).mapp[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te lazy val Te: Parser[String, AExp] = - (Fa ~ p"*" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || - (Fa ~ p"/" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa + (Fa ~ p"*" ~ Te).mapp[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || + (Fa ~ p"/" ~ Te).mapp[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa lazy val Fa: Parser[String, AExp] = - (p"(" ~ AExp ~ p")").map{ case _ ~ y ~ _ => y } || - IdParser.map(Var) || - NumParser.map(Num) + (p"(" ~ AExp ~ p")").mapp{ case _ ~ y ~ _ => y } || + IdParser.mapp(Var) || + NumParser.mapp(Num) // boolean expressions with some simple nesting lazy val BExp: Parser[String, BExp] = - (AExp ~ p"==" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || - (AExp ~ p"!=" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || - (AExp ~ p"<" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || - (AExp ~ p">" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } || - (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } || - (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } || - (p"true".map[BExp]{ _ => True }) || - (p"false".map[BExp]{ _ => False }) || - (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x } + (AExp ~ p"==" ~ AExp).mapp[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || + (AExp ~ p"!=" ~ AExp).mapp[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || + (AExp ~ p"<" ~ AExp).mapp[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || + (AExp ~ p">" ~ AExp).mapp[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } || + (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).mapp[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } || + (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).mapp[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } || + (p"true".mapp[BExp]{ _ => True }) || + (p"false".mapp[BExp]{ _ => False }) || + (p"(" ~ BExp ~ p")").mapp[BExp]{ case _ ~ x ~ _ => x } // a single statement lazy val Stmt: Parser[String, Stmt] = - ((p"skip".map[Stmt]{_ => Skip }) || - (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } || - (p"write(" ~ IdParser ~ p")").map[Stmt]{ case _ ~ y ~ _ => Write(y) } || + ((p"skip".mapp[Stmt]{_ => Skip }) || + (IdParser ~ p":=" ~ AExp).mapp[Stmt]{ case x ~ _ ~ z => Assign(x, z) } || + (p"write(" ~ IdParser ~ p")").mapp[Stmt]{ case _ ~ y ~ _ => Write(y) } || (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block) - .map[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } || - (p"while" ~ BExp ~ p"do" ~ Block).map[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) }) + .mapp[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } || + (p"while" ~ BExp ~ p"do" ~ Block).mapp[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) }) // statements lazy val Stmts: Parser[String, Block] = - (Stmt ~ p";" ~ Stmts).map[Block]{ case x ~ _ ~ z => x :: z } || - (Stmt.map[Block]{ s => List(s) }) + (Stmt ~ p";" ~ Stmts).mapp[Block]{ case x ~ _ ~ z => x :: z } || + (Stmt.mapp[Block]{ s => List(s) }) // blocks (enclosed in curly braces) lazy val Block: Parser[String, Block] = - ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || - (Stmt.map(s => List(s)))) + ((p"{" ~ Stmts ~ p"}").mapp{ case _ ~ y ~ _ => y } || + (Stmt.mapp(s => List(s)))) // Examples @@ -273,8 +270,3 @@ println(eval(Stmts.parse_all(primes).head)) - - - - -// runs with amm2 and amm3 diff -r 15973df32613 -r 2bf1516d730f slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r 15973df32613 -r 2bf1516d730f slides/slides01.tex --- a/slides/slides01.tex Wed Dec 21 14:33:05 2022 +0000 +++ b/slides/slides01.tex Tue Apr 04 22:31:09 2023 +0100 @@ -50,7 +50,188 @@ %A physical explanation the \emph{dynamic matrix}\\ %lots of text %\end{mybox3} -%\end{frame} +% \end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Lunch with a Lecturer (29 March)\\[5mm] + \end{tabular}} + + I teach CFL (compilers) and PEP (Scala)\bigskip + + \begin{itemize} + \item did undergraduate in Germany + \item Master in St Andrews + \item PhD in Cambridge + \end{itemize}\bigskip\bigskip + + use mainly the Isabelle theorem prover in my work (see 6CCS3VER) + + write code in functional programming languages (Scala, SML, Ocaml, Haskell) +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Why Bother with Regexes?} + +\begin{columns}[t,onlytextwidth] +\begin{column}{1.8cm} +\mbox{} +\end{column} +\begin{column}{.5\textwidth} +\small{}Ruby, Python, Java 8\medskip\\ +\begin{tikzpicture}\footnotesize +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xtick={0,5,...,30}, + xmax=33, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=\textwidth, + height=4cm, + legend entries={Python,Ruby}, + legend pos=north west, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; +\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data}; +\end{axis} +\end{tikzpicture} +\begin{tikzpicture}\footnotesize +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xtick={0,5,...,30}, + xmax=33, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=\textwidth, + height=4cm, + legend entries={Python, Java 8, JavaScript, Swift}, + legend pos=north west, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; +\addplot[red,mark=*, mark options={fill=white}] table {re-js.data}; +\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data}; +\end{axis} +\end{tikzpicture} +% +\end{column} +\begin{column}{.5\textwidth} +\small{}In PEP \& CFL \medskip\\ +\begin{tikzpicture}\footnotesize +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.07,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xtick={0,5000,...,10000}, + xmax=11000, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=\textwidth, + height=4cm] +\addplot[green,mark=square*,mark options={fill=white}] table {re2.data}; +\addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; +\end{axis} +\end{tikzpicture} +\begin{tikzpicture}\footnotesize +\begin{axis}[ + xlabel={$n$}, + x label style={at={(1.07,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + ymax=35, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=\textwidth, + height=4cm] +\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data}; +\end{axis} +\end{tikzpicture} +\end{column} +\end{columns} +\medskip + +\begin{textblock}{3}(-0.1,3.3) +\small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}: +\end{textblock} + +\begin{textblock}{3}(-0.1,8.7) +\small\hfill\bl{\texttt{(a*)*b}}: +\end{textblock} + +\begin{textblock}{3}(0.3,13) +\small{}matching with strings +\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c,fragile] + \frametitle{Incidents} + + \begin{itemize} + \item a global outage on 2 July 2019 at \textbf{Cloudflare} + (first one for six years)\medskip + + \begin{center}\small\color{blue} + \begin{verbatim} + (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false| + null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s + |-|~|!|{}|\|\||\+)*.*(?:.*=.*))) + \end{verbatim} + \end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip + + \item on 20 July 2016 the \textbf{Stack Exchange} webpage went down + because of an evil regular expression + \here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016} + \end{itemize} + + \begin{textblock}{6}(6,7.6) + \includegraphics[scale=0.14]{../pics/cloudflare.png}\\ + \footnotesize + It serves more web traffic than Twitter, Amazon, Apple, + Instagram, Bing \& Wikipedia combined. + \here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/} + \end{textblock} + + \end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame}[c] + +\frametitle{} +\begin{mybox3}{}\it + ``This conversation is interesting to me, and I've researched it a little bit... I also disagree with Dr. Urban on the cost/benefit of non-GC languages...[..]\\ + + But regardless, Scala is a lot slower than, say, C or Rust. To say it's not is basically wrong (imo)....[..] + ''\\ +\mbox{}\hfill-- Oliver Iliffe, discussion this year in PEP +\end{mybox3}\pause + +\end{frame} + +\begin{frame}<1-10> +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t]