# HG changeset patch # User Christian Urban # Date 1446799936 0 # Node ID 5a83336a969073547a5ea08a285d5cb9d6715194 # Parent 9b71dead1219e61db2a18a47b60ab932f78db2d7 updated diff -r 9b71dead1219 -r 5a83336a9690 progs/comb1.scala --- a/progs/comb1.scala Fri Nov 06 04:54:41 2015 +0000 +++ b/progs/comb1.scala Fri Nov 06 08:52:16 2015 +0000 @@ -89,7 +89,7 @@ // arithmetic expressions lazy val E: Parser[String, String] = - (F ~ "*" ~ F) ==> { case ((x, y), z) => x + y + z } || F + (F ~ "*" ~ T) ==> { case ((x, y), z) => x + y + z } || F lazy val F: Parser[String, String] = ((T ~ "+" ~ T) ==> { case ((x, y), z) => x + y + z } || (T ~ "-" ~ T) ==> { case ((x, y), z) => x + y + z } || T) @@ -103,41 +103,39 @@ // how the grammar is set up // non-ambiguous vs ambiguous grammars -lazy val U : Parser[String, String] = - ("1" ~ U) ==> { case (x, y) => x + y } || "" - -U.parse_all("1" * 100 + "0") - lazy val S : Parser[String, String] = ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } || "" S.parse_all("1" * 15) - +lazy val U : Parser[String, String] = + ("1" ~ U) ==> { case (x, y) => x + y } || "" - - +U.parse("11") +U.parse("11111") +U.parse("11011") - - +U.parse_all("1" * 100 + "0") +lazy val UCount : Parser[String, Int] = + ("1" ~ UCount) ==> { case (x, y) => y + 1 } || + "" ==> { (x) => 0 } - +UCount.parse("11111") +UCount.parse_all("11111") - - - - - - +// Single Character parser +lazy val One : Parser[String, String] = "1" +lazy val Two : Parser[String, String] = "2" - - - +One.parse("1") +One.parse("111") - +(One ~ One).parse("111") +(One ~ One ~ One).parse("111") +(One ~ One ~ One ~ One).parse("1111") - +(One || Two).parse("111") diff -r 9b71dead1219 -r 5a83336a9690 slides/slides06.pdf Binary file slides/slides06.pdf has changed diff -r 9b71dead1219 -r 5a83336a9690 slides/slides06.tex --- a/slides/slides06.tex Fri Nov 06 04:54:41 2015 +0000 +++ b/slides/slides06.tex Fri Nov 06 08:52:16 2015 +0000 @@ -9,6 +9,7 @@ \renewcommand{\slidecaption}{AFL 06, King's College London} \newcommand{\bl}[1]{\textcolor{blue}{#1}} %\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions +\newcommand{\qq}{\mbox{\texttt{"}}} \begin{document} @@ -91,35 +92,8 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -\newcommand{\qq}{\mbox{\texttt{"}}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{CFGs} - -A \alert{context-free} grammar (CFG) \bl{$G$} consists of: - -\begin{itemize} -\item a finite set of nonterminal symbols (upper case) -\item a finite terminal symbols or tokens (lower case) -\item a start symbol (which must be a nonterminal) -\item a set of rules -\begin{center} -\bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} -\end{center} - -where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause - -\end{itemize} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] \frametitle{\begin{tabular}{c}Hierarchy of Languages\end{tabular}} Recall that languages are sets of strings. @@ -137,7 +111,142 @@ \end{center} -\end{frame}} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Two Grammars} + +Which languages are recognised by the following two grammars? + +\begin{center} +\bl{\begin{tabular}{lcl} +$S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$U$ & $\rightarrow$ & $1 \cdot U$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Atomic parsers, for example + +\begin{center} +\bl{$1::rest \;\Rightarrow\; \{(1, rest)\}$} +\end{center}\bigskip + +\begin{itemize} +\item you consume one or more token from the\\ + input (stream) +\item also works for characters and strings +\end{itemize} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Alternative parser (code \bl{$p\;||\;q$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} and also \bl{$q$}; then combine + the outputs +\end{itemize} + +\begin{center} +\large \bl{$p(\text{input}) \cup q(\text{input})$} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Sequence parser (code \bl{$p\sim q$})\bigskip + +\begin{itemize} +\item apply first \bl{$p$} producing a set of pairs +\item then apply \bl{$q$} to the unparsed parts +\item then combine the results:\medskip +\begin{center} +((output$_1$, output$_2$), unparsed part) +\end{center} +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} +\end{tabular} +\end{center} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Function parser (code \bl{$p \Rightarrow f\;$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} producing a set of pairs +\item then apply the function \bl{$f$} to each first component +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} +\end{tabular} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Ambiguous Grammars} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, + enlargelimits=false, + xtick={0,100,...,1000}, + xmax=1050, + ymax=33, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=11cm, + height=7cm, + legend entries={unambiguous,ambiguous}, + legend pos=north east, + legend cell align=left, + x tick label style={font=\small,/pgf/number format/1000 sep={}}] +\addplot[blue,mark=*, mark options={fill=white}] + table {s-grammar1.data}; +\only<2>{ + \addplot[red,mark=triangle*, mark options={fill=white}] + table {s-grammar2.data};} +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -190,35 +299,6 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}} - - -To disambiguate - -\begin{center} -\bl{\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\ -\end{tabular}} -\end{center} - -Decide on how many precedence levels, say\medskip\\ -\hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+} - -\begin{center} -\bl{\begin{tabular}{lcl} -$E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\ -$E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\ -$E_{hi}$ & $\rightarrow$ & $( \cdot E_{low} \cdot ) \;|\;N$ \\ -\end{tabular}} -\end{center}\pause - -\small What happens with \bl{$1 + 3 + 4$}? -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] \frametitle{\begin{tabular}{c}Removing Left-Recursion\end{tabular}} The rule for numbers is directly left-recursive: @@ -263,6 +343,35 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] +\frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}} + + +To disambiguate + +\begin{center} +\bl{\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\ +\end{tabular}} +\end{center} + +Decide on how many precedence levels, say\medskip\\ +\hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+} + +\begin{center} +\bl{\begin{tabular}{lcl} +$E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\ +$E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\ +$E_{hi}$ & $\rightarrow$ & $( \cdot E_{low} \cdot ) \;|\;N$ \\ +\end{tabular}} +\end{center}\pause + +\small What happens with \bl{$1 + 3 + 4$}? +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] \frametitle{\begin{tabular}{c}Chomsky Normal Form\end{tabular}} All rules must be of the form @@ -365,7 +474,47 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ +\begin{frame}[c] +\frametitle{The Goal of this Course} +\mbox{}\\[-26mm]\mbox{} + +\begin{center} + \begin{tikzpicture}[scale=1, + node/.style={ + rectangle,rounded corners=3mm, + very thick,draw=black!50, + minimum height=18mm, minimum width=20mm, + top color=white,bottom color=black!20}] + + \node at (3.05, 1.8) {\Large\bf Write a Compiler}; + + \node (0) at (-2.3,0) {}; + + \node (A) at (0,0) [node] {}; + \node [below right] at (A.north west) {lexer}; + + \node (B) at (3,0) [node] {}; + \node [below right=1mm] at (B.north west) + {\mbox{}\hspace{-1mm}parser}; + + \node (C) at (6,0) [node] {}; + \node [below right] at (C.north west) + {\mbox{}\hspace{-1mm}code gen}; + + \node (1) at (8.4,0) {}; + + \draw [->,line width=4mm] (0) -- (A); + \draw [->,line width=4mm] (A) -- (B); + \draw [->,line width=4mm] (B) -- (C); + \draw [->,line width=4mm] (C) -- (1); + \end{tikzpicture} + \end{center} + +We have lexer and parser. + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \begin{center} @@ -385,7 +534,7 @@ \textit{BExp} & $\rightarrow$ & \ldots\\ \end{tabular}} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%