# HG changeset patch # User Christian Urban # Date 1353488651 0 # Node ID e6868bd2942b6dca7d72b67469e499b40ee84df8 # Parent cc3f7908b9428a5a7e4bc92b8594cc476df8dd48 tuned diff -r cc3f7908b942 -r e6868bd2942b app7.scala --- a/app7.scala Wed Nov 21 07:28:28 2012 +0000 +++ b/app7.scala Wed Nov 21 09:04:11 2012 +0000 @@ -9,6 +9,8 @@ new AltParser(this, right) def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = + new SeqParser(this, right) } diff -r cc3f7908b942 -r e6868bd2942b parser4.scala --- a/parser4.scala Wed Nov 21 07:28:28 2012 +0000 +++ b/parser4.scala Wed Nov 21 09:04:11 2012 +0000 @@ -1,6 +1,5 @@ // parser combinators with input type I and return type T -// and memoisation case class SubString(s: String, l: Int, h: Int) { def low = l @@ -23,8 +22,6 @@ def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1) } class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { @@ -70,6 +67,7 @@ } } +// ambigous grammar lazy val E: Parser[Int] = new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || @@ -79,33 +77,6 @@ "2" ==> { (s) => 2 } || "3" ==> { (s) => 3 }) -println("foo " + E.parse_all("1+2*3")) +println(E.parse_all("1+2*3")) -// ambiguous grammar - -lazy val S: Parser[String] = - new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "") - -lazy val S2: Parser[String] = - new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "") - -def test2(i: Int) = { - val result = E.parse_all("1" * i) - print(result.size + " " + result + " ") -} - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 10) { - print(i + " ") - print("%.5f".format(time_needed(1, test2(i)))) - print("\n") -} - diff -r cc3f7908b942 -r e6868bd2942b slides08.pdf Binary file slides08.pdf has changed diff -r cc3f7908b942 -r e6868bd2942b slides08.tex --- a/slides08.tex Wed Nov 21 07:28:28 2012 +0000 +++ b/slides08.tex Wed Nov 21 09:04:11 2012 +0000 @@ -151,7 +151,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Building a Web Browser\end{tabular}} +\frametitle{\begin{tabular}{c}Building a ``Web Browser''\end{tabular}} Using a lexer: assume the following regular expressions @@ -171,11 +171,11 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}} +\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} \begin{itemize} -\item text should be formatted consistently up to a specified width, say 60 characters -\item potential linebreaks are inserted by the formatter +\item the text should be formatted consistently up to a specified width, say 60 characters +\item potential linebreaks are inserted by the formatter (not the input) \item repeated whitespaces are ``condensed'' to a single whitepace \item \bl{$<\!p\!>$} \bl{$<\!\slash{}p\!>$} start/end paragraph \item \bl{$<\!b\!>$} \bl{$<\!\slash{}b\!>$} start/end bold @@ -190,7 +190,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}} +\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}} The lexer cannot prevent errors like @@ -256,9 +256,9 @@ Sequence parser (code \bl{$p\sim q$})\bigskip \begin{itemize} -\item apply \bl{$p$} first producing a set of pairs +\item apply first \bl{$p$} producing a set of pairs \item then apply \bl{$q$} to the unparsed parts -\item the combine the results:\\ \mbox{}\;\;((input$_1$, input$_2$), unparsed part) +\item then combine the results:\\ \mbox{}\;\;((output$_1$, output$_2$), unparsed part) \end{itemize} \begin{center} @@ -282,15 +282,16 @@ \begin{itemize} \item apply \bl{$p$} producing a set of pairs -\item then apply the function \bl{$f$} to each fist component +\item then apply the function \bl{$f$} to each first component \end{itemize} \begin{center} \begin{tabular}{l} \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} \end{tabular} -\end{center} +\end{center}\bigskip\bigskip\pause +\bl{$f$} is the semantic action (``what to do with the parsed input'') \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -396,7 +397,7 @@ \bl{$(ntp \sim ntp) \Longrightarrow f$} \end{center} -where \bl{$f$} is the semantic action (what to do with the pair) +where \bl{$f$} is the semantic action (``what to do with the pair'') \end{itemize} @@ -478,6 +479,39 @@ \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Scannerless Parsers\end{tabular}} + +\begin{itemize} +\item input: \alert{string} +\item output: set of (output\_type, \alert{string}) +\end{itemize}\bigskip + +but lexers are better when whitespaces or comments need to be filtered out + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Successful Parses\end{tabular}} + +\begin{itemize} +\item input: string +\item output: \alert{set of} (output\_type, string) +\end{itemize}\bigskip + +a parse is successful whenever the input has been +fully ``consumed'' (that is the second component is empty) + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] @@ -578,7 +612,7 @@ \begin{itemize} \item we record when we recursively called a parser\bigskip \item whenever the is a recursion, the parser must have consumed something --- so -I can decrease the input string/list of token by one (at the end) +we can decrease the input string/list of token by one (at the end) \end{itemize} \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -590,15 +624,17 @@ \begin{center} -\bl{\begin{tabular}{lcl} +\bl{\begin{tabular}{@{}lcl@{}} $Stmt$ & $\rightarrow$ & $\text{skip}$\\ & $|$ & $Id := AExp$\\ & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ - & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\bigskip\\ + & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ $Stmt$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ - & $|$ & $Stmt$\bigskip\\ + & $|$ & $Stmt$\medskip\\ $Block$ & $\rightarrow$ & $\{ Stmts \}$\\ - & $|$ & $Stmt$\\ + & $|$ & $Stmt$\medskip\\ +$AExp$ & $\rightarrow$ & \ldots\\ +$BExp$ & $\rightarrow$ & \ldots\\ \end{tabular}} \end{center} diff -r cc3f7908b942 -r e6868bd2942b while.scala --- a/while.scala Wed Nov 21 07:28:28 2012 +0000 +++ b/while.scala Wed Nov 21 09:04:11 2012 +0000 @@ -1,12 +1,13 @@ //:load matcher.scala +//:load parser3.scala // some regular expressions -val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_""") +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") val DIGIT = RANGE("0123456789") val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "begin", "end", "true", "false") +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") val SEMI: Rexp = ";" val OP: Rexp = ALTS(":=", "=", "+", "-", "*") val WHITESPACE = PLUS(RANGE(" \n")) @@ -15,7 +16,7 @@ val BEGIN: Rexp = "{" val END: Rexp = "}" -// for classifying the strings that have been recognised +// tokens for classifying the strings that have been recognised abstract class Token case object T_WHITESPACE extends Token case object T_SEMI extends Token @@ -28,7 +29,6 @@ case class T_NUM(s: String) extends Token case class T_KWD(s: String) extends Token - val lexing_rules: List[Rule[Token]] = List((KEYWORD, (s) => T_KWD(s.mkString)), (ID, (s) => T_ID(s.mkString)), @@ -53,14 +53,16 @@ case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt case class While(b: BExp, bl: Block) extends Stmt case class Assign(s: String, a: AExp) extends Stmt + case class Var(s: String) extends AExp case class Num(i: Int) extends AExp case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + case object True extends BExp case object False extends BExp case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - +// atomic parsers case class TokParser(tok: Token) extends Parser[List[Token], Token] { def parse(ts: List[Token]) = ts match { case t::ts if (t == tok) => Set((t, ts)) @@ -91,12 +93,12 @@ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F lazy val F: Parser[List[Token], AExp] = (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> ((s) => Var(s)) || - NumParser ==> ((i) => Num(i)) + IdParser ==> Var || + NumParser ==> Num lazy val BExp: Parser[List[Token], BExp] = (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("true") ==> ((_) => True)) || (T_KWD("false") ==> ((_) => False: BExp)) lazy val Stmt: Parser[List[Token], Stmt] = @@ -113,43 +115,58 @@ (T_BEGIN ~> Stmts <~ T_END) || (Stmt ==> ((s) => List(s))) + +// examples val p1 = "x := 5" val p1_toks = Tok.fromString(p1) val p1_ast = Block.parse_all(p1_toks) println(p1_toks) println(p1_ast) +val p1a = "{ x := 5; y := 8}" +val p1a_toks = Tok.fromString(p1a) +val p1a_ast = Block.parse_all(p1a_toks) +println(p1a_ast) + val p2 = "5 = 6" val p2_toks = Tok.fromString(p2) val p2_ast = BExp.parse_all(p2_toks) -println(p2_toks) println(p2_ast) val p2a = "true" val p2a_toks = Tok.fromString(p2a) val p2a_ast = BExp.parse_all(p2a_toks) -println(p2a_toks) println(p2a_ast) val p3 = "if true then skip else skip" val p3_toks = Tok.fromString(p3) val p3_ast = Stmt.parse_all(p3_toks) -println(p3_toks) println(p3_ast) val p3a = "if true then x := 5 else x := 10" val p3a_toks = Tok.fromString(p3a) val p3a_ast = Stmt.parse_all(p3a_toks) -println(p3a_toks) println(p3a_ast) val p3b = "if false then x := 5 else x := 10" val p3b_toks = Tok.fromString(p3b) val p3b_ast = Stmt.parse_all(p3b_toks) -println(p3b_toks) println(p3b_ast) +val p4 = """{ x := 5; + y := 3; + r := 0; + while y = 0 do { + r := r + x; + y := y - 1 + } + }""" +val p4_toks = Tok.fromString(p4) +val p4_ast = Block.parse_all(p4_toks) +println(p4_ast) + +// interpreter type Env = Map[String, Int] def eval_bexp(b: BExp, env: Env) : Boolean = b match {