updated
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Fri, 06 Nov 2015 08:52:16 +0000
changeset 366 5a83336a9690
parent 365 9b71dead1219
child 367 04127a5aad23
updated
progs/comb1.scala
slides/slides06.pdf
slides/slides06.tex
--- a/progs/comb1.scala	Fri Nov 06 04:54:41 2015 +0000
+++ b/progs/comb1.scala	Fri Nov 06 08:52:16 2015 +0000
@@ -89,7 +89,7 @@
 
 // arithmetic expressions
 lazy val E: Parser[String, String] = 
-  (F ~ "*" ~ F) ==> { case ((x, y), z) => x + y + z } || F  
+  (F ~ "*" ~ T) ==> { case ((x, y), z) => x + y + z } || F  
 lazy val F: Parser[String, String] = 
   ((T ~ "+" ~ T) ==> { case ((x, y), z) => x + y + z } ||
    (T ~ "-" ~ T) ==> { case ((x, y), z) => x + y + z } || T)
@@ -103,41 +103,39 @@
                                // how the grammar is set up
 
 // non-ambiguous vs ambiguous grammars
-lazy val U : Parser[String, String] =
-  ("1" ~ U) ==> { case (x, y) => x + y } || ""
-
-U.parse_all("1" * 100 + "0")
-
 lazy val S : Parser[String, String] =
   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } || ""
 
 S.parse_all("1" * 15)
 
-
+lazy val U : Parser[String, String] =
+  ("1" ~ U) ==> { case (x, y) => x + y  } || ""
 
-
-
+U.parse("11")
+U.parse("11111")
+U.parse("11011")
 
-
-
+U.parse_all("1" * 100 + "0")
 
+lazy val UCount : Parser[String, Int] =
+  ("1" ~ UCount) ==> { case (x, y) => y + 1 } || 
+  "" ==> { (x) => 0 }
 
-
+UCount.parse("11111")
+UCount.parse_all("11111")
 
 
 
 
-
-
-
-
-
-
+// Single Character parser
+lazy val One : Parser[String, String] = "1"
+lazy val Two : Parser[String, String] = "2"
 
-
-
-
+One.parse("1")
+One.parse("111")
 
-
+(One ~ One).parse("111")
+(One ~ One ~ One).parse("111")
+(One ~ One ~ One ~ One).parse("1111")
 
-
+(One || Two).parse("111")
Binary file slides/slides06.pdf has changed
--- a/slides/slides06.tex	Fri Nov 06 04:54:41 2015 +0000
+++ b/slides/slides06.tex	Fri Nov 06 08:52:16 2015 +0000
@@ -9,6 +9,7 @@
 \renewcommand{\slidecaption}{AFL 06, King's College London}
 \newcommand{\bl}[1]{\textcolor{blue}{#1}}       
 %\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions
+\newcommand{\qq}{\mbox{\texttt{"}}}
 
 \begin{document}
 
@@ -91,35 +92,8 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
-
-
-
-\newcommand{\qq}{\mbox{\texttt{"}}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
-\frametitle{CFGs}
-
-A \alert{context-free} grammar (CFG) \bl{$G$} consists of:
-
-\begin{itemize}
-\item a finite set of nonterminal symbols (upper case)
-\item a finite terminal symbols or tokens (lower case)
-\item a start symbol (which must be a nonterminal)
-\item a set of rules
-\begin{center}
-\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$}
-\end{center}
-
-where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause
-
-\end{itemize}
-
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
 \frametitle{\begin{tabular}{c}Hierarchy of Languages\end{tabular}}
 
 Recall that languages are sets of strings.
@@ -137,7 +111,142 @@
 
 \end{center}
 
-\end{frame}}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Two Grammars}
+
+Which languages are recognised by the following two grammars?
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$S$ & $\rightarrow$ &  $1 \cdot S \cdot S$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}\bigskip
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$U$ & $\rightarrow$ &  $1 \cdot U$\\
+        & $|$ & $\epsilon$
+\end{tabular}}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Atomic parsers, for example
+
+\begin{center}
+\bl{$1::rest \;\Rightarrow\; \{(1, rest)\}$} 
+\end{center}\bigskip
+
+\begin{itemize}
+\item you consume one or more token from the\\ 
+  input (stream)
+\item also works for characters and strings
+\end{itemize}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Alternative parser (code \bl{$p\;||\;q$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} and also \bl{$q$}; then combine 
+  the outputs
+\end{itemize}
+
+\begin{center}
+\large \bl{$p(\text{input}) \cup q(\text{input})$}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Sequence parser (code \bl{$p\sim q$})\bigskip
+
+\begin{itemize}
+\item apply first \bl{$p$} producing a set of pairs
+\item then apply \bl{$q$} to the unparsed parts
+\item then combine the results:\medskip 
+\begin{center}
+((output$_1$, output$_2$), unparsed part)
+\end{center}
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] 
+\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm]
+\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$}
+\end{tabular}
+\end{center}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+Function parser (code \bl{$p \Rightarrow f\;$})\bigskip
+
+\begin{itemize}
+\item apply \bl{$p$} producing a set of pairs
+\item then apply the function \bl{$f$} to each first component
+\end{itemize}
+
+\begin{center}
+\begin{tabular}{l}
+\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$}
+\end{tabular}
+\end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Ambiguous Grammars}
+
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,100,...,1000},
+    xmax=1050,
+    ymax=33,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=11cm,
+    height=7cm, 
+    legend entries={unambiguous,ambiguous},  
+    legend pos=north east,
+    legend cell align=left,
+    x tick label style={font=\small,/pgf/number format/1000 sep={}}]
+\addplot[blue,mark=*, mark options={fill=white}] 
+  table {s-grammar1.data};
+\only<2>{
+  \addplot[red,mark=triangle*, mark options={fill=white}] 
+  table {s-grammar2.data};}  
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -190,35 +299,6 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}}
-
-
-To disambiguate
-
-\begin{center}
-\bl{\begin{tabular}{lcl}
-$E$ & $\rightarrow$ &  $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\
-\end{tabular}}
-\end{center}
-
-Decide on how many precedence levels, say\medskip\\
-\hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+}
-
-\begin{center}
-\bl{\begin{tabular}{lcl}
-$E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\
-$E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\
-$E_{hi}$ & $\rightarrow$ &  $( \cdot E_{low} \cdot ) \;|\;N$ \\
-\end{tabular}}
-\end{center}\pause
-
-\small What happens with \bl{$1 + 3  + 4$}?
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
 \frametitle{\begin{tabular}{c}Removing Left-Recursion\end{tabular}}
 
 The rule for numbers is directly left-recursive:
@@ -263,6 +343,35 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \mode<presentation>{
 \begin{frame}[c]
+\frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}}
+
+
+To disambiguate
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\
+\end{tabular}}
+\end{center}
+
+Decide on how many precedence levels, say\medskip\\
+\hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+}
+
+\begin{center}
+\bl{\begin{tabular}{lcl}
+$E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\
+$E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\
+$E_{hi}$ & $\rightarrow$ &  $( \cdot E_{low} \cdot ) \;|\;N$ \\
+\end{tabular}}
+\end{center}\pause
+
+\small What happens with \bl{$1 + 3  + 4$}?
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
 \frametitle{\begin{tabular}{c}Chomsky Normal Form\end{tabular}}
 
 All rules must be of the form
@@ -365,7 +474,47 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
+\begin{frame}[c]
+\frametitle{The Goal of this Course}
+\mbox{}\\[-26mm]\mbox{}
+
+\begin{center}
+  \begin{tikzpicture}[scale=1,
+                      node/.style={
+                      rectangle,rounded corners=3mm,
+                      very thick,draw=black!50,
+                      minimum height=18mm, minimum width=20mm,
+                      top color=white,bottom color=black!20}]
+
+  \node at (3.05, 1.8) {\Large\bf Write a Compiler};
+
+  \node (0) at (-2.3,0) {}; 
+  
+  \node (A) at (0,0)  [node] {};
+  \node [below right] at (A.north west) {lexer};
+
+  \node (B) at (3,0)  [node] {};
+  \node [below right=1mm] at (B.north west) 
+    {\mbox{}\hspace{-1mm}parser};
+
+  \node (C) at (6,0)  [node] {};
+  \node [below right] at (C.north west) 
+    {\mbox{}\hspace{-1mm}code gen};
+
+  \node (1) at (8.4,0) {}; 
+
+  \draw [->,line width=4mm] (0) -- (A); 
+  \draw [->,line width=4mm] (A) -- (B); 
+  \draw [->,line width=4mm] (B) -- (C); 
+  \draw [->,line width=4mm] (C) -- (1); 
+  \end{tikzpicture}
+  \end{center}
+  
+We have lexer and parser.
+  
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
 
 \begin{center}
@@ -385,7 +534,7 @@
 \textit{BExp} & $\rightarrow$ & \ldots\\
 \end{tabular}}
 \end{center}
-\end{frame}}
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%