author | Christian Urban <christian.urban@kcl.ac.uk> |
Fri, 17 Nov 2023 20:06:43 +0000 | |
changeset 955 | 47acfd7f9096 |
parent 954 | eda0ccf56c72 |
child 956 | ae9782e62bdd |
--- a/hws/hw06.tex Sat Nov 11 10:08:33 2023 +0000 +++ b/hws/hw06.tex Fri Nov 17 20:06:43 2023 +0000 @@ -27,6 +27,11 @@ Observe the maximal munch rule and priorities of your regular expressions that make the process of lexing unambiguous.) +\solution{ + a and b are ok; c is not matched because x4x is not an identifier according to the rules. +} + + \item Suppose the grammar \begin{plstx}[margin=1cm] @@ -43,6 +48,38 @@ of precedences of the operators? \solution{ + \begin{tikzpicture}[level distance=10mm, black,sibling distance=10mm] + \node {\meta{E}} + child[sibling distance=20mm] {node {\meta{F}} + child {node {\meta{T}} + child[sibling distance=5mm] {node {(}} + child[sibling distance=5mm] {node {\meta{E}} + child {node {\meta{F}} + child {node {\meta{T}} child {node {3}}} + child {node {+}} + child {node {\meta{T}} child {node {3}}} + } + } + child[sibling distance=5mm] {node {)}} + } + child {node {+}} + child {node {\meta{T}} + child[sibling distance=5mm] {node {(}} + child[sibling distance=5mm] {node {\meta{E}} + child {node {\meta{F}} + child {node {\meta{T}} child {node {2}}} + } + child {node {*}} + child {node {\meta{F}} + child {node {\meta{T}} child {node {3}}} + } + } + child[sibling distance=5mm] {node {)}} + } + }; +\end{tikzpicture} + + For the second part "1+2*3" will be parsed as (1+2)*3, meaning + and - bind tighter than * and $\backslash$ } @@ -76,7 +113,16 @@ and (ii) give a sample string involving all rules given in 1.-4.~that can be parsed by this grammar. +\solution{ +The simplest grammar is + +\begin{plstx}[margin=1cm] + :\meta{B} ::= true \;|\; false \;|\;\meta{B} \cdot $\wedge$ \cdot \meta{B} \;|\; \meta{B} \cdot $\vee$ \cdot \meta{B} + \;|\; $\neg$\meta{B} \;|\; (\cdot\meta{B}\cdot)\\ +\end{plstx} +This is unfortunately a left-recursive grammar. Giving a not left-recursive one is a bit more involved. + } \item Parsing combinators consist of atomic parsers, alternative parsers, sequence parsers and semantic actions. What is the purpose
--- a/progs/fun/fun.sc Sat Nov 11 10:08:33 2023 +0000 +++ b/progs/fun/fun.sc Fri Nov 17 20:06:43 2023 +0000 @@ -74,7 +74,7 @@ import scala.language.reflectiveCalls // convenience for code-generation (string interpolations) -implicit def sring_inters(sc: StringContext) = new { +extension (sc: StringContext) { def i(args: Any*): String = " " ++ sc.s(args:_*) ++ "\n" // instructions def l(args: Any*): String = sc.s(args:_*) ++ ":\n" // labels def m(args: Any*): String = sc.s(args:_*) ++ "\n" // methods
--- a/progs/fun/fun_parser.sc Sat Nov 11 10:08:33 2023 +0000 +++ b/progs/fun/fun_parser.sc Fri Nov 17 20:06:43 2023 +0000 @@ -4,7 +4,7 @@ // call with // // amm fun_parser.sc fact.fun -// + // amm fun_parser.sc defs.fun // // this will generate a parse-tree from a list @@ -19,49 +19,66 @@ // Parser combinators // type parameter I needs to be of Seq-type // -abstract class Parser[I, T](implicit ev: I => Seq[_]) { +type IsSeq[I] = I => Seq[_] + +/* +abstract class Parser[I, T](using is: I => Seq[_]) { + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if is(tl).isEmpty) yield hd +} +*/ + + +abstract class Parser[I, T](using is: I => Seq[_]) { def parse(ts: I): Set[(T, I)] def parse_single(ts: I) : T = - parse(ts).partition(_._2.isEmpty) match { + parse(ts).partition(p => is(p._2).isEmpty) match { case (good, _) if !good.isEmpty => good.head._1 - case (_, err) => { - println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) } + case (_, err) => { println (s"Parse Error\n${err.minBy(p => is(p._2).length)}") ; sys.exit(-1) } } } // convenience for writing grammar rules case class ~[+A, +B](_1: A, _2: B) -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) +// parser combinators + +// alternative parser +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) } -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +// sequence parser +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, ~[T, S]] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } -class FunParser[I, T, S](p: => Parser[I, T], - f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +// map parser +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } -// convenient combinators -implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { - def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) - def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) + + +// more convenient syntax for parser combinators +extension [I : IsSeq, T](p: Parser[I, T]) { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) } -def ListParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { - (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] } || - (p ==> ((s) => List(s))) +def ListParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S])(using is: I => Seq[_]): Parser[I, List[T]] = { + (p ~ q ~ ListParser(p, q)).map{ case (x:T) ~ (y:S) ~ (z:List[T]) => x :: z } || + (p.map[List[T]]{s => List(s)}) } case class TokParser(tok: Token) extends Parser[List[Token], Token] { @@ -71,11 +88,12 @@ } } -implicit def token2tparser(t: Token) = TokParser(t) +implicit def token2tparser(t: Token) : Parser[List[Token], Token] = TokParser(t) + -implicit def TokOps(t: Token) = new { +extension (t: Token) { def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) - def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) + def map[S] (f: => Token => S) = new MapParser[List[Token], Token, S](t, f) def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) } @@ -118,41 +136,39 @@ // arithmetic expressions lazy val Exp: Parser[List[Token], Exp] = - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> - { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } || - (M ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || M + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp).map{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } || + (M ~ T_SEMI ~ Exp).map{ case x ~ _ ~ y => Sequence(x, y): Exp } || M lazy val M: Parser[List[Token], Exp] = - (T_KWD("write") ~ L) ==> { case _ ~ y => Write(y): Exp } || L + (T_KWD("write") ~ L).map{ case _ ~ y => Write(y): Exp } || L lazy val L: Parser[List[Token], Exp] = - (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } || - (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T + (T ~ T_OP("+") ~ Exp).map{ case x ~ _ ~ z => Aop("+", x, z): Exp } || + (T ~ T_OP("-") ~ Exp).map{ case x ~ _ ~ z => Aop("-", x, z): Exp } || T lazy val T: Parser[List[Token], Exp] = - (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || - (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || - (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F + (F ~ T_OP("*") ~ T).map{ case x ~ _ ~ z => Aop("*", x, z): Exp } || + (F ~ T_OP("/") ~ T).map{ case x ~ _ ~ z => Aop("/", x, z): Exp } || + (F ~ T_OP("%") ~ T).map{ case x ~ _ ~ z => Aop("%", x, z): Exp } || F lazy val F: Parser[List[Token], Exp] = - (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> + (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN).map { case x ~ _ ~ z ~ _ => Call(x, z): Exp } || - (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || - IdParser ==> { case x => Var(x): Exp } || - NumParser ==> { case x => Num(x): Exp } + (T_LPAREN ~ Exp ~ T_RPAREN).map{ case _ ~ y ~ _ => y: Exp } || + IdParser.map{ case x => Var(x): Exp } || + NumParser.map{ case x => Num(x): Exp } // boolean expressions lazy val BExp: Parser[List[Token], BExp] = - (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || - (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || - (Exp ~ T_OP("<") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || - (Exp ~ T_OP(">") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", z, x): BExp } || - (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || - (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } + (Exp ~ T_OP("==") ~ Exp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || + (Exp ~ T_OP("!=") ~ Exp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || + (Exp ~ T_OP("<") ~ Exp) .map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || + (Exp ~ T_OP(">") ~ Exp) .map{ case x ~ _ ~ z => Bop("<", z, x): BExp } || + (Exp ~ T_OP("<=") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", x, z): BExp } || + (Exp ~ T_OP("=>") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", z, x): BExp } lazy val Defn: Parser[List[Token], Decl] = - (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp) ==> - { case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl } + (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp).map{ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl } lazy val Prog: Parser[List[Token], List[Decl]] = - (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || - (Exp ==> ((s) => List(Main(s)) : List[Decl])) + (Defn ~ T_SEMI ~ Prog).map{ case x ~ _ ~ z => x :: z : List[Decl] } || + (Exp.map((s) => List(Main(s)) : List[Decl]))
--- a/progs/fun/fun_tokens.sc Sat Nov 11 10:08:33 2023 +0000 +++ b/progs/fun/fun_tokens.sc Fri Nov 17 20:06:43 2023 +0000 @@ -8,7 +8,7 @@ // amm fun_tokens.sc defs.fun // - + import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -40,20 +40,16 @@ implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) -implicit def RexpOps(r: Rexp) = new { +extension (s: String) { + def $ (r: Rexp) = RECD(s, r) +} + +extension (r: Rexp) { def | (s: Rexp) = ALT(r, s) def % = STAR(r) def ~ (s: Rexp) = SEQ(r, s) } -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) - def $ (r: Rexp) = RECD(s, r) -} def nullable (r: Rexp) : Boolean = r match { case ZERO => false @@ -81,7 +77,7 @@ // extracts a string from value def flatten(v: Val) : String = v match { case Empty => "" - case Chr(c) => c.toString + case Chr(c) => c.toString case Left(v) => flatten(v) case Right(v) => flatten(v) case Sequ(v1, v2) => flatten(v1) + flatten(v2)
--- a/progs/lexer/lexer.sc Sat Nov 11 10:08:33 2023 +0000 +++ b/progs/lexer/lexer.sc Fri Nov 17 20:06:43 2023 +0000 @@ -1,4 +1,4 @@ -// A simple lexer inspired by work of Sulzmann & Lu +// A simple lexer inspired by work of Sulzmann $ Lu //================================================== // // Call the test cases with @@ -43,10 +43,9 @@ implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) -// to use & for records, instead of $ which had -// its precedence be changed in Scala 3 + extension (s: String) { - def & (r: Rexp) = RECD(s, r) + def $ (r: Rexp) = RECD(s, r) } extension (r: Rexp) { @@ -60,7 +59,7 @@ case ONE => true case CHAR(_) => false case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case SEQ(r1, r2) => nullable(r1) $$ nullable(r2) case STAR(_) => true case RECD(_, r1) => nullable(r1) } @@ -209,14 +208,14 @@ val STRING: Rexp = "\"" ~ SYM.% ~ "\"" -val WHILE_REGS = (("k" & KEYWORD) | - ("i" & ID) | - ("o" & OP) | - ("n" & NUM) | - ("s" & SEMI) | - ("str" & STRING) | - ("p" & (LPAREN | RPAREN)) | - ("w" & WHITESPACE)).% +val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("str" $ STRING) | + ("p" $ (LPAREN | RPAREN)) | + ("w" $ WHITESPACE)).% // Two Simple While Tests
--- a/progs/parser-combinators/comb2.sc Sat Nov 11 10:08:33 2023 +0000 +++ b/progs/parser-combinators/comb2.sc Fri Nov 17 20:06:43 2023 +0000 @@ -9,15 +9,13 @@ // // amm comb2.sc - // more convenience for the map parsers later on; // it allows writing nested patterns as // case x ~ y ~ z => ... + case class ~[+A, +B](x: A, y: B) -val a = (1, "2") -val v = new ~(1, "2") type IsSeq[I] = I => Seq[_] @@ -100,8 +98,6 @@ } - - // the abstract syntax trees for the WHILE language abstract class Stmt abstract class AExp @@ -172,10 +168,11 @@ // Examples -Stmt.parse_all("x2:=5+3") -Block.parse_all("{x:=5;y:=8}") -Block.parse_all("if(false)then{x:=5}else{x:=10}") - +println(BExp.parse_all("5+3")) +println(Stmt.parse_all("5==3")) +println(Stmt.parse_all("x2:=5+3")) +println(Block.parse_all("{x:=5;y:=8}")) +println(Block.parse_all("if(false)then{x:=5}else{x:=10}")) val fib = """n := 10; minus1 := 0; @@ -189,7 +186,8 @@ }; result := minus2""".replaceAll("\\s+", "") -Stmts.parse_all(fib) +println("fib testcase:") +println(Stmts.parse_all(fib)) // an interpreter for the WHILE language @@ -272,4 +270,3 @@ }""".replaceAll("\\s+", "") println(eval(Stmts.parse_all(primes).head)) -
--- a/slides/slides07.tex Sat Nov 11 10:08:33 2023 +0000 +++ b/slides/slides07.tex Fri Nov 17 20:06:43 2023 +0000 @@ -12,6 +12,7 @@ \begin{document} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] \frametitle{% @@ -21,52 +22,14 @@ \LARGE Formal Languages\\[-5mm] \end{tabular}} - \normalsize \begin{center} \begin{tabular}{ll} Email: & christian.urban at kcl.ac.uk\\ - Office Hour: & Thurdays 15 -- 16\\ - Location: & N7.07 (North Wing, Bush House)\\ - Slides \& Progs: & KEATS\\ - Pollev: & \texttt{\alert{https://pollev.com/cfltutoratki576}}\\ - \end{tabular} - \end{center} - - \begin{center} - \begin{tikzpicture} - \node[drop shadow,fill=white,inner sep=0pt] - {\footnotesize\rowcolors{1}{capri!10}{white} - \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline - 1 Introduction, Languages & 6 While-Language \\ - 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ - 3 Automata, Regular Languages & \cellcolor{blue!50} 8 Compiling Functional Languages \\ - 4 Lexing, Tokenising & 9 Optimisations \\ - 5 Grammars, Parsing & 10 LLVM \\ \hline - \end{tabular}% - }; - \end{tikzpicture} - \end{center} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Compilers and \\[-2mm] - \LARGE Formal Languages\\[3mm] - \end{tabular}} - - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - %Office Hours: & Thursdays 12 -- 14\\ - %Location: & N7.07 (North Wing, Bush House)\\ - Slides \& Progs: & KEATS (also homework is there)\\ + Office Hour: & Thurdays 15 -- 16\\ + Location: & N7.07 (North Wing, Bush House)\\ + Slides \& Progs: & KEATS (also homework is there)\\ + Pollev: & \texttt{\alert{https://pollev.com/cfltutoratki576}}\\ \end{tabular} \end{center}