afl-material: changeset 70:e6868bd2942b

--- a/app7.scala	Wed Nov 21 07:28:28 2012 +0000
+++ b/app7.scala	Wed Nov 21 09:04:11 2012 +0000
@@ -9,6 +9,8 @@
       new AltParser(this, right)
   def ==>[S] (f: => T => S) : Parser [I, S] = 
       new FunParser(this, f)
+   def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = 
+      new SeqParser(this, right)
 }

--- a/parser4.scala	Wed Nov 21 07:28:28 2012 +0000
+++ b/parser4.scala	Wed Nov 21 09:04:11 2012 +0000
@@ -1,6 +1,5 @@
 
 // parser combinators with input type I and return type T
-// and memoisation
 
 case class SubString(s: String, l: Int, h: Int) {
   def low = l
@@ -23,8 +22,6 @@
   def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
   def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
   def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
-  def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2)
-  def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1)
 }
 
 class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
@@ -70,6 +67,7 @@
   }
 }
 
+// ambigous grammar
 lazy val E: Parser[Int] = 
   new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || 
                  (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
@@ -79,33 +77,6 @@
                  "2" ==> { (s) => 2 } ||
                  "3" ==> { (s) => 3 })
 
-println("foo " + E.parse_all("1+2*3"))
+println(E.parse_all("1+2*3"))
 
 
-// ambiguous grammar
-
-lazy val S: Parser[String] = 
-  new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "")
-
-lazy val S2: Parser[String] = 
-  new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "")
-
-def test2(i: Int) = {
-  val result = E.parse_all("1" * i)
-  print(result.size + " " + result + " ")
-}
-
-def time_needed[T](i: Int, code: => T) = {
-  val start = System.nanoTime()
-  for (j <- 1 to i) code
-  val end = System.nanoTime()
-  (end - start)/(i * 1.0e9)
-}
-
-
-for (i <- 1 to 10) {
-  print(i + " ")
-  print("%.5f".format(time_needed(1, test2(i))))
-  print("\n")
-}
-

Binary file slides08.pdf has changed

--- a/slides08.tex	Wed Nov 21 07:28:28 2012 +0000
+++ b/slides08.tex	Wed Nov 21 09:04:11 2012 +0000
@@ -151,7 +151,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}Building a Web Browser\end{tabular}}
+\frametitle{\begin{tabular}{c}Building a ``Web Browser''\end{tabular}}
 
 Using a lexer: assume the following regular expressions
 
@@ -171,11 +171,11 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}}
+\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}}
 
 \begin{itemize}
-\item text should be formatted consistently up to a specified width, say 60 characters 
-\item potential linebreaks are inserted by the formatter
+\item the text should be formatted consistently up to a specified width, say 60 characters 
+\item potential linebreaks are inserted by the formatter (not the input)
 \item repeated whitespaces are ``condensed'' to a single whitepace
 \item \bl{$<\!p\!>$} \bl{$<\!\slash{}p\!>$}  start/end paragraph
 \item \bl{$<\!b\!>$} \bl{$<\!\slash{}b\!>$}  start/end bold
@@ -190,7 +190,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}Interpreting a List of Token\end{tabular}}
+\frametitle{\begin{tabular}{c}Interpreting a List of Tokens\end{tabular}}
 
 The lexer cannot prevent errors like
 
@@ -256,9 +256,9 @@
 Sequence parser (code \bl{$p\sim q$})\bigskip
 
 \begin{itemize}
-\item apply \bl{$p$} first producing a set of pairs
+\item apply first \bl{$p$} producing a set of pairs
 \item then apply \bl{$q$} to the unparsed parts
-\item the combine the results:\\ \mbox{}\;\;((input$_1$, input$_2$), unparsed part)
+\item then combine the results:\\ \mbox{}\;\;((output$_1$, output$_2$), unparsed part)
 \end{itemize}
 
 \begin{center}
@@ -282,15 +282,16 @@
 
 \begin{itemize}
 \item apply \bl{$p$} producing a set of pairs
-\item then apply the function \bl{$f$} to each fist component
+\item then apply the function \bl{$f$} to each first component
 \end{itemize}
 
 \begin{center}
 \begin{tabular}{l}
 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
 \end{tabular}
-\end{center}
+\end{center}\bigskip\bigskip\pause
 
+\bl{$f$} is the semantic action (``what to do with the parsed input'')
 
 \end{frame}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
@@ -396,7 +397,7 @@
 \bl{$(ntp \sim ntp) \Longrightarrow f$}
 \end{center}
 
-where \bl{$f$} is the semantic action (what to do with the pair)
+where \bl{$f$} is the semantic action (``what to do with the pair'')
 
 \end{itemize}
 
@@ -478,6 +479,39 @@
 \end{frame}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Scannerless Parsers\end{tabular}}
+
+\begin{itemize}
+\item input: \alert{string}
+\item output: set of (output\_type, \alert{string})
+\end{itemize}\bigskip
+
+but lexers are better when whitespaces or comments need to be filtered out
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Successful Parses\end{tabular}}
+
+\begin{itemize}
+\item input: string
+\item output: \alert{set of} (output\_type, string)
+\end{itemize}\bigskip
+
+a parse is successful whenever the input has been
+fully ``consumed'' (that is the second component is empty)
+
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
@@ -578,7 +612,7 @@
 \begin{itemize}
 \item we record when we recursively called a parser\bigskip
 \item whenever the is a recursion, the parser must have consumed something --- so
-I can decrease the input string/list of token by one (at the end)
+we can decrease the input string/list of token by one (at the end)
 \end{itemize}
 \end{frame}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
@@ -590,15 +624,17 @@
 
 
 \begin{center}
-\bl{\begin{tabular}{lcl}
+\bl{\begin{tabular}{@{}lcl@{}}
 $Stmt$ & $\rightarrow$ &  $\text{skip}$\\
               & $|$ & $Id := AExp$\\
               & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\
-              & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\bigskip\\
+              & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\
 $Stmt$ & $\rightarrow$ &  $Stmt \;\text{;}\; Stmts$\\
-              & $|$ & $Stmt$\bigskip\\
+              & $|$ & $Stmt$\medskip\\
 $Block$ & $\rightarrow$ &  $\{ Stmts \}$\\
-                & $|$ & $Stmt$\\
+                & $|$ & $Stmt$\medskip\\
+$AExp$ & $\rightarrow$ & \ldots\\
+$BExp$ & $\rightarrow$ & \ldots\\
 \end{tabular}}
 \end{center}

--- a/while.scala	Wed Nov 21 07:28:28 2012 +0000
+++ b/while.scala	Wed Nov 21 09:04:11 2012 +0000
@@ -1,12 +1,13 @@
 
 //:load matcher.scala
+//:load parser3.scala
 
 // some regular expressions
-val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_""")
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
 val DIGIT = RANGE("0123456789")
 val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) 
 val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "begin", "end", "true", "false") 
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") 
 val SEMI: Rexp = ";"
 val OP: Rexp = ALTS(":=", "=", "+", "-", "*")
 val WHITESPACE = PLUS(RANGE(" \n"))
@@ -15,7 +16,7 @@
 val BEGIN: Rexp = "{"
 val END: Rexp = "}"
 
-// for classifying the strings that have been recognised
+// tokens for classifying the strings that have been recognised
 abstract class Token
 case object T_WHITESPACE extends Token
 case object T_SEMI extends Token
@@ -28,7 +29,6 @@
 case class T_NUM(s: String) extends Token
 case class T_KWD(s: String) extends Token
 
-
 val lexing_rules: List[Rule[Token]] = 
   List((KEYWORD, (s) => T_KWD(s.mkString)),
        (ID, (s) => T_ID(s.mkString)),
@@ -53,14 +53,16 @@
 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
 case class While(b: BExp, bl: Block) extends Stmt
 case class Assign(s: String, a: AExp) extends Stmt
+
 case class Var(s: String) extends AExp
 case class Num(i: Int) extends AExp
 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
 case object True extends BExp
 case object False extends BExp
 case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
 
-
+// atomic parsers
 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
   def parse(ts: List[Token]) = ts match {
     case t::ts if (t == tok) => Set((t, ts)) 
@@ -91,12 +93,12 @@
   (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
 lazy val F: Parser[List[Token], AExp] = 
   (T_LPAREN ~> AExp <~ T_RPAREN) || 
-  IdParser ==> ((s) => Var(s)) || 
-  NumParser ==> ((i) => Num(i))
+  IdParser ==> Var || 
+  NumParser ==> Num
 
 lazy val BExp: Parser[List[Token], BExp] = 
   (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || 
-  (T_KWD("true") ==> ((_) => True: BExp)) || 
+  (T_KWD("true") ==> ((_) => True)) || 
   (T_KWD("false") ==> ((_) => False: BExp))
 
 lazy val Stmt: Parser[List[Token], Stmt] =
@@ -113,43 +115,58 @@
   (T_BEGIN ~> Stmts <~ T_END) || 
   (Stmt ==> ((s) => List(s)))
 
+
+// examples
 val p1 = "x := 5"
 val p1_toks = Tok.fromString(p1) 
 val p1_ast = Block.parse_all(p1_toks)
 println(p1_toks)
 println(p1_ast)
 
+val p1a = "{ x := 5; y := 8}"
+val p1a_toks = Tok.fromString(p1a) 
+val p1a_ast = Block.parse_all(p1a_toks)
+println(p1a_ast)
+
 val p2 = "5 = 6"
 val p2_toks = Tok.fromString(p2) 
 val p2_ast = BExp.parse_all(p2_toks)
-println(p2_toks)
 println(p2_ast)
 
 val p2a = "true"
 val p2a_toks = Tok.fromString(p2a) 
 val p2a_ast = BExp.parse_all(p2a_toks)
-println(p2a_toks)
 println(p2a_ast)
 
 val p3 = "if true then skip else skip"
 val p3_toks = Tok.fromString(p3) 
 val p3_ast = Stmt.parse_all(p3_toks)
-println(p3_toks)
 println(p3_ast)
 
 val p3a = "if true then x := 5 else x := 10"
 val p3a_toks = Tok.fromString(p3a) 
 val p3a_ast = Stmt.parse_all(p3a_toks)
-println(p3a_toks)
 println(p3a_ast)
 
 val p3b = "if false then x := 5 else x := 10"
 val p3b_toks = Tok.fromString(p3b) 
 val p3b_ast = Stmt.parse_all(p3b_toks)
-println(p3b_toks)
 println(p3b_ast)
 
+val p4 = """{ x := 5;
+              y := 3;
+              r := 0;
+              while y = 0 do {
+                 r := r + x;
+                 y := y - 1
+              } 
+            }"""
+val p4_toks = Tok.fromString(p4) 
+val p4_ast = Block.parse_all(p4_toks)
+println(p4_ast)
 
+
+// interpreter
 type Env = Map[String, Int]
 
 def eval_bexp(b: BExp, env: Env) : Boolean = b match {

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Wed, 21 Nov 2012 09:04:11 +0000
changeset 70	e6868bd2942b
parent 69	cc3f7908b942
child 71	7717f20f0504

app7.scala		file \| annotate \| diff \| comparison \| revisions
parser4.scala		file \| annotate \| diff \| comparison \| revisions
slides08.pdf		file \| annotate \| diff \| comparison \| revisions
slides08.tex		file \| annotate \| diff \| comparison \| revisions
while.scala		file \| annotate \| diff \| comparison \| revisions