--- a/app7.scala Wed Nov 21 07:28:28 2012 +0000
+++ b/app7.scala Wed Nov 21 09:04:11 2012 +0000
@@ -9,6 +9,8 @@
new AltParser(this, right)
def ==>[S] (f: => T => S) : Parser [I, S] =
new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] =
+ new SeqParser(this, right)
}
--- a/parser4.scala Wed Nov 21 07:28:28 2012 +0000
+++ b/parser4.scala Wed Nov 21 09:04:11 2012 +0000
@@ -1,6 +1,5 @@
// parser combinators with input type I and return type T
-// and memoisation
case class SubString(s: String, l: Int, h: Int) {
def low = l
@@ -23,8 +22,6 @@
def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1)
}
class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
@@ -70,6 +67,7 @@
}
}
+// ambigous grammar
lazy val E: Parser[Int] =
new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} ||
(E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
@@ -79,33 +77,6 @@
"2" ==> { (s) => 2 } ||
"3" ==> { (s) => 3 })
-println("foo " + E.parse_all("1+2*3"))
+println(E.parse_all("1+2*3"))
-// ambiguous grammar
-
-lazy val S: Parser[String] =
- new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "")
-
-lazy val S2: Parser[String] =
- new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "")
-
-def test2(i: Int) = {
- val result = E.parse_all("1" * i)
- print(result.size + " " + result + " ")
-}
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-for (i <- 1 to 10) {
- print(i + " ")
- print("%.5f".format(time_needed(1, test2(i))))
- print("\n")
-}
-
Binary file slides08.pdf has changed
--- a/slides08.tex Wed Nov 21 07:28:28 2012 +0000
+++ b/slides08.tex Wed Nov 21 09:04:11 2012 +0000
@@ -151,7 +151,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Building a Web Browser\end{tabular}}
+\frametitle{\begin{tabular}{c}Building a ``Web Browser''\end{tabular}}
Using a lexer: assume the following regular expressions
@@ -171,11 +171,11 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}}
+\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}}
\begin{itemize}
-\item text should be formatted consistently up to a specified width, say 60 characters
-\item potential linebreaks are inserted by the formatter
+\item the text should be formatted consistently up to a specified width, say 60 characters
+\item potential linebreaks are inserted by the formatter (not the input)
\item repeated whitespaces are ``condensed'' to a single whitepace
\item \bl{$<\!p\!>$} \bl{$<\!\slash{}p\!>$} start/end paragraph
\item \bl{$<\!b\!>$} \bl{$<\!\slash{}b\!>$} start/end bold
@@ -190,7 +190,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}}
+\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}}
The lexer cannot prevent errors like
@@ -256,9 +256,9 @@
Sequence parser (code \bl{$p\sim q$})\bigskip
\begin{itemize}
-\item apply \bl{$p$} first producing a set of pairs
+\item apply first \bl{$p$} producing a set of pairs
\item then apply \bl{$q$} to the unparsed parts
-\item the combine the results:\\ \mbox{}\;\;((input$_1$, input$_2$), unparsed part)
+\item then combine the results:\\ \mbox{}\;\;((output$_1$, output$_2$), unparsed part)
\end{itemize}
\begin{center}
@@ -282,15 +282,16 @@
\begin{itemize}
\item apply \bl{$p$} producing a set of pairs
-\item then apply the function \bl{$f$} to each fist component
+\item then apply the function \bl{$f$} to each first component
\end{itemize}
\begin{center}
\begin{tabular}{l}
\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
\end{tabular}
-\end{center}
+\end{center}\bigskip\bigskip\pause
+\bl{$f$} is the semantic action (``what to do with the parsed input'')
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -396,7 +397,7 @@
\bl{$(ntp \sim ntp) \Longrightarrow f$}
\end{center}
-where \bl{$f$} is the semantic action (what to do with the pair)
+where \bl{$f$} is the semantic action (``what to do with the pair'')
\end{itemize}
@@ -478,6 +479,39 @@
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Scannerless Parsers\end{tabular}}
+
+\begin{itemize}
+\item input: \alert{string}
+\item output: set of (output\_type, \alert{string})
+\end{itemize}\bigskip
+
+but lexers are better when whitespaces or comments need to be filtered out
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Successful Parses\end{tabular}}
+
+\begin{itemize}
+\item input: string
+\item output: \alert{set of} (output\_type, string)
+\end{itemize}\bigskip
+
+a parse is successful whenever the input has been
+fully ``consumed'' (that is the second component is empty)
+
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
@@ -578,7 +612,7 @@
\begin{itemize}
\item we record when we recursively called a parser\bigskip
\item whenever the is a recursion, the parser must have consumed something --- so
-I can decrease the input string/list of token by one (at the end)
+we can decrease the input string/list of token by one (at the end)
\end{itemize}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -590,15 +624,17 @@
\begin{center}
-\bl{\begin{tabular}{lcl}
+\bl{\begin{tabular}{@{}lcl@{}}
$Stmt$ & $\rightarrow$ & $\text{skip}$\\
& $|$ & $Id := AExp$\\
& $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\
- & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\bigskip\\
+ & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\
$Stmt$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\
- & $|$ & $Stmt$\bigskip\\
+ & $|$ & $Stmt$\medskip\\
$Block$ & $\rightarrow$ & $\{ Stmts \}$\\
- & $|$ & $Stmt$\\
+ & $|$ & $Stmt$\medskip\\
+$AExp$ & $\rightarrow$ & \ldots\\
+$BExp$ & $\rightarrow$ & \ldots\\
\end{tabular}}
\end{center}
--- a/while.scala Wed Nov 21 07:28:28 2012 +0000
+++ b/while.scala Wed Nov 21 09:04:11 2012 +0000
@@ -1,12 +1,13 @@
//:load matcher.scala
+//:load parser3.scala
// some regular expressions
-val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_""")
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
val DIGIT = RANGE("0123456789")
val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "begin", "end", "true", "false")
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false")
val SEMI: Rexp = ";"
val OP: Rexp = ALTS(":=", "=", "+", "-", "*")
val WHITESPACE = PLUS(RANGE(" \n"))
@@ -15,7 +16,7 @@
val BEGIN: Rexp = "{"
val END: Rexp = "}"
-// for classifying the strings that have been recognised
+// tokens for classifying the strings that have been recognised
abstract class Token
case object T_WHITESPACE extends Token
case object T_SEMI extends Token
@@ -28,7 +29,6 @@
case class T_NUM(s: String) extends Token
case class T_KWD(s: String) extends Token
-
val lexing_rules: List[Rule[Token]] =
List((KEYWORD, (s) => T_KWD(s.mkString)),
(ID, (s) => T_ID(s.mkString)),
@@ -53,14 +53,16 @@
case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
case class While(b: BExp, bl: Block) extends Stmt
case class Assign(s: String, a: AExp) extends Stmt
+
case class Var(s: String) extends AExp
case class Num(i: Int) extends AExp
case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
case object True extends BExp
case object False extends BExp
case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
-
+// atomic parsers
case class TokParser(tok: Token) extends Parser[List[Token], Token] {
def parse(ts: List[Token]) = ts match {
case t::ts if (t == tok) => Set((t, ts))
@@ -91,12 +93,12 @@
(F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
lazy val F: Parser[List[Token], AExp] =
(T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> ((s) => Var(s)) ||
- NumParser ==> ((i) => Num(i))
+ IdParser ==> Var ||
+ NumParser ==> Num
lazy val BExp: Parser[List[Token], BExp] =
(AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
- (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("true") ==> ((_) => True)) ||
(T_KWD("false") ==> ((_) => False: BExp))
lazy val Stmt: Parser[List[Token], Stmt] =
@@ -113,43 +115,58 @@
(T_BEGIN ~> Stmts <~ T_END) ||
(Stmt ==> ((s) => List(s)))
+
+// examples
val p1 = "x := 5"
val p1_toks = Tok.fromString(p1)
val p1_ast = Block.parse_all(p1_toks)
println(p1_toks)
println(p1_ast)
+val p1a = "{ x := 5; y := 8}"
+val p1a_toks = Tok.fromString(p1a)
+val p1a_ast = Block.parse_all(p1a_toks)
+println(p1a_ast)
+
val p2 = "5 = 6"
val p2_toks = Tok.fromString(p2)
val p2_ast = BExp.parse_all(p2_toks)
-println(p2_toks)
println(p2_ast)
val p2a = "true"
val p2a_toks = Tok.fromString(p2a)
val p2a_ast = BExp.parse_all(p2a_toks)
-println(p2a_toks)
println(p2a_ast)
val p3 = "if true then skip else skip"
val p3_toks = Tok.fromString(p3)
val p3_ast = Stmt.parse_all(p3_toks)
-println(p3_toks)
println(p3_ast)
val p3a = "if true then x := 5 else x := 10"
val p3a_toks = Tok.fromString(p3a)
val p3a_ast = Stmt.parse_all(p3a_toks)
-println(p3a_toks)
println(p3a_ast)
val p3b = "if false then x := 5 else x := 10"
val p3b_toks = Tok.fromString(p3b)
val p3b_ast = Stmt.parse_all(p3b_toks)
-println(p3b_toks)
println(p3b_ast)
+val p4 = """{ x := 5;
+ y := 3;
+ r := 0;
+ while y = 0 do {
+ r := r + x;
+ y := y - 1
+ }
+ }"""
+val p4_toks = Tok.fromString(p4)
+val p4_ast = Block.parse_all(p4_toks)
+println(p4_ast)
+
+// interpreter
type Env = Map[String, Int]
def eval_bexp(b: BExp, env: Env) : Boolean = b match {