# HG changeset patch # User Christian Urban # Date 1605279828 0 # Node ID d4fb8c7fc3bf39ad8809e2fd08467e50bf0b034f # Parent f4db602f642f4e1d8defff83597148cb9a063828 updated diff -r f4db602f642f -r d4fb8c7fc3bf progs/parser-combinators/comb1.sc --- a/progs/parser-combinators/comb1.sc Thu Nov 12 12:22:26 2020 +0000 +++ b/progs/parser-combinators/comb1.sc Fri Nov 13 15:03:48 2020 +0000 @@ -79,13 +79,9 @@ // p"<_some_string_>" implicit def parser_interpolation(sc: StringContext) = new { - def p(args: Any*) = TokParser(sc.s(args:_*)) + def p(args: Any*) = StrParser(sc.s(args:_*)) } - -p"while" ==> StrParser[String,....] - TokParser[List[Token],....] - -for x := 3 to 10 + // more convenient syntax for parser combinators implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { @@ -106,7 +102,7 @@ // A parser for palindromes (just returns them as string) -lazy val Pal : Parser[List[Token], String] = { +lazy val Pal : Parser[String, String] = { (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || p"a" || p"b" || p"" diff -r f4db602f642f -r d4fb8c7fc3bf progs/parser-combinators/comb2.sc --- a/progs/parser-combinators/comb2.sc Thu Nov 12 12:22:26 2020 +0000 +++ b/progs/parser-combinators/comb2.sc Fri Nov 13 15:03:48 2020 +0000 @@ -10,14 +10,11 @@ // amm comb2.sc - - // more convenience for the map parsers later on; // it allows writing nested patterns as // case x ~ y ~ z => ... -case class ~[+A, +B](_1: A, _2: B) - +case class ~[+A, +B](x: A, y: B) // constraint for the input type IsSeq[A] = A => Seq[_] @@ -172,7 +169,7 @@ // Examples -Stmts.parse_all("x2:=5+3;") +Stmt.parse_all("x2:=5+3") Block.parse_all("{x:=5;y:=8}") Block.parse_all("if(false)then{x:=5}else{x:=10}") diff -r f4db602f642f -r d4fb8c7fc3bf slides/slides06.pdf Binary file slides/slides06.pdf has changed diff -r f4db602f642f -r d4fb8c7fc3bf slides/slides06.tex --- a/slides/slides06.tex Thu Nov 12 12:22:26 2020 +0000 +++ b/slides/slides06.tex Fri Nov 13 15:03:48 2020 +0000 @@ -53,306 +53,19 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Parser Combinators} - -One of the simplest ways to implement a parser, see -{\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip - -\begin{itemize} -\item[$\bullet$] build-in library in Scala -\item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite -\item[$\bullet$] possible exponential runtime behaviour -\end{itemize} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Parser Combinators} - -Parser combinators: \bigskip - -\begin{minipage}{1.1\textwidth} -\begin{center} -\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} -$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ -\end{center} -\end{minipage}\bigskip - -\begin{itemize} -\item atomic parsers -\item sequencing -\item alternative -\item semantic action (map-parser) -\end{itemize} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] - -Atomic parsers, for example, number tokens - -\begin{center} -\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} -\end{center}\bigskip - -\begin{itemize} -\item you consume one or more token from the\\ - input (stream) -\item also works for characters and strings -\end{itemize} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -Alternative parser (code \bl{$p\;||\;q$})\bigskip - -\begin{itemize} -\item apply \bl{$p$} and also \bl{$q$}; then combine - the outputs -\end{itemize} +%%\frametitle{Test Program} -\begin{center} -\large \bl{$p(\text{input}) \cup q(\text{input})$} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -Sequence parser (code \bl{$p\sim q$})\bigskip +\mbox{}\\[-18mm]\mbox{} -\begin{itemize} -\item apply first \bl{$p$} producing a set of pairs -\item then apply \bl{$q$} to the unparsed part -\item then combine the results:\medskip -\begin{center} -((output$_1$, output$_2$), unparsed part) -\end{center} -\end{itemize} - -\begin{center} -\begin{tabular}{l} -\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] -\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] -\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} -\end{tabular} -\end{center} - +{\lstset{language=While}%%\fontsize{10}{12}\selectfont +\texttt{\lstinputlisting{../progs/while-tests/loops.while}}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -Map-parser (code \bl{$p.map(f)\;$})\bigskip - -\begin{itemize} -\item apply \bl{$p$} producing a set of pairs -\item then apply the function \bl{$f$} to each first component -\end{itemize} - -\begin{center} -\begin{tabular}{l} -\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} -\end{tabular} -\end{center}\bigskip\bigskip\pause - -\bl{$f$} is the semantic action (``what to do with the parsed input'') - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} - -Addition - -\begin{center} -\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} -\end{center}\pause - -Multiplication - -\begin{center} -\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} -\end{center}\pause - -Parenthesis - -\begin{center} -\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Types of Parsers} - -\begin{itemize} -\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, -then \bl{$p \sim q$} returns results of type - -\begin{center} -\bl{$T \times S$} -\end{center}\pause - -\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, -and \bl{$p \;||\; q$} returns results of type - -\begin{center} -\bl{$T$} -\end{center}\pause - -\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from -\bl{$T$} to \bl{$S$}, then -\bl{$p \Rightarrow f$} returns results of type - -\begin{center} -\bl{$S$} -\end{center} - -\end{itemize} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Input Types of Parsers} - -\begin{itemize} -\item input: \alert{token list} -\item output: set of (output\_type, \alert{token list}) -\end{itemize}\bigskip\pause - -actually it can be any input type as long as it is a kind of -sequence (for example a string) - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Scannerless Parsers} - -\begin{itemize} -\item input: \alert{string} -\item output: set of (output\_type, \alert{string}) -\end{itemize}\bigskip\bigskip - -but using lexers is better because whitespaces or comments can be -filtered out; then input is a sequence of tokens - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Successful Parses} - -\begin{itemize} -\item input: string -\item output: \alert{set of} (output\_type, string) -\end{itemize}\bigskip - -a parse is successful whenever the input has been fully -``consumed'' (that is the second component is empty) - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Abstract Parser Class} - -\small -\lstinputlisting[language=Scala,xleftmargin=1mm] - {../progs/app7.scala} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -\small -\fontsize{10}{12}\selectfont -\lstinputlisting[language=Scala,xleftmargin=1mm] - {../progs/app8.scala} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Two Grammars} - -Which languages are recognised by the following two grammars? - -\begin{center} -\bl{\begin{tabular}{lcl} -$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ - & $|$ & $\epsilon$ -\end{tabular}} -\end{center}\bigskip - -\begin{center} -\bl{\begin{tabular}{lcl} -$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ - & $|$ & $\epsilon$ -\end{tabular}} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[t] -\frametitle{Ambiguous Grammars} - -\begin{center} -\begin{tikzpicture} -\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, - enlargelimits=false, - xtick={0,100,...,1000}, - xmax=1050, - ymax=33, - ytick={0,5,...,30}, - scaled ticks=false, - axis lines=left, - width=11cm, - height=7cm, - legend entries={unambiguous,ambiguous}, - legend pos=north east, - legend cell align=left, - x tick label style={font=\small,/pgf/number format/1000 sep={}}] -\addplot[blue,mark=*, mark options={fill=white}] - table {s-grammar1.data}; -\only<2>{ - \addplot[red,mark=triangle*, mark options={fill=white}] - table {s-grammar2.data};} -\end{axis} -\end{tikzpicture} -\end{center} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} @@ -374,27 +87,23 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ \begin{frame}[c] -\frametitle{An Interpreter} +\frametitle{\begin{tabular}{c}Aexps\end{tabular}} \begin{center} -\bl{\begin{tabular}{l} -$\{$\\ -\;\;$x := 5 \text{;}$\\ -\;\;$y := x * 3\text{;}$\\ -\;\;$y := x * 4\text{;}$\\ -\;\;$x := u * 3$\\ -$\}$ +\bl{\begin{tabular}{@{}lcl@{}} +$\text{eval}(n)$ & $\dn$ & $n$\\ +$\text{eval}(a_1 + a_2)$ & $\dn$ & $\text{eval}(a_1) + \text{eval}(a_2)$\\ +$\text{eval}(a_1 - a_2)$ & $\dn$ & $\text{eval}(a_1) - \text{eval}(a_2)$\\ +$\text{eval}(a_1 * a_2)$ & $\dn$ & $\text{eval}(a_1) * \text{eval}(a_2)$\bigskip\\ +$\text{eval}(x)$ & $\dn$ & $???$\\ \end{tabular}} \end{center} -\begin{itemize} -\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause -\item \bl{\texttt{eval(stmt, env)}} -\end{itemize} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -416,7 +125,32 @@ \end{center} \end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{An Interpreter (1)} + +\begin{center} +\bl{\begin{tabular}{l} +$\{$\\ +\;\;$x := 5 \text{;}$\\ +\;\;$y := x * 3\text{;}$\\ +\;\;$y := x * 4\text{;}$\\ +\;\;$x := u * 3$\\ +$\}$ +\end{tabular}} +\end{center} + +\begin{itemize} +\item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause +\item \bl{\texttt{eval(stmt, env)}} +\end{itemize} + +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] @@ -442,17 +176,6 @@ \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Test Program} - -\mbox{}\\[-18mm]\mbox{} - -??%{\lstset{language=While}%%\fontsize{10}{12}\selectfont -%\texttt{\lstinputlisting{../progs/loops.while}}} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ @@ -470,6 +193,9 @@ \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\end{document} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] @@ -489,42 +215,42 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[t] -\frametitle{% - \begin{tabular}{@ {}c@ {}} - \\[-3mm] - \LARGE Compilers and \\[-2mm] - \LARGE Formal Languages\\[3mm] - \end{tabular}} +% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \begin{frame}[t] +% \frametitle{% +% \begin{tabular}{@ {}c@ {}} +% \\[-3mm] +% \LARGE Compilers and \\[-2mm] +% \LARGE Formal Languages\\[3mm] +% \end{tabular}} - \normalsize - \begin{center} - \begin{tabular}{ll} - Email: & christian.urban at kcl.ac.uk\\ - %Office Hours: & Thursdays 12 -- 14\\ - %Location: & N7.07 (North Wing, Bush House)\\ - Slides \& Progs: & KEATS (also homework is there)\\ - \end{tabular} - \end{center} +% \normalsize +% \begin{center} +% \begin{tabular}{ll} +% Email: & christian.urban at kcl.ac.uk\\ +% %Office Hours: & Thursdays 12 -- 14\\ +% %Location: & N7.07 (North Wing, Bush House)\\ +% Slides \& Progs: & KEATS (also homework is there)\\ +% \end{tabular} +% \end{center} - \begin{center} - \begin{tikzpicture} - \node[drop shadow,fill=white,inner sep=0pt] - {\footnotesize\rowcolors{1}{capri!10}{white} - \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline - 1 Introduction, Languages & \cellcolor{blue!50} 6 While-Language \\ - 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ - 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ - 4 Lexing, Tokenising & 9 Optimisations \\ - 5 Grammars, Parsing & 10 LLVM \\ \hline - \end{tabular}% - }; - \end{tikzpicture} - \end{center} +% \begin{center} +% \begin{tikzpicture} +% \node[drop shadow,fill=white,inner sep=0pt] +% {\footnotesize\rowcolors{1}{capri!10}{white} +% \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline +% 1 Introduction, Languages & \cellcolor{blue!50} 6 While-Language \\ +% 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ +% 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ +% 4 Lexing, Tokenising & 9 Optimisations \\ +% 5 Grammars, Parsing & 10 LLVM \\ \hline +% \end{tabular}% +% }; +% \end{tikzpicture} +% \end{center} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% \end{frame} +% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1244,6 +970,310 @@ \end{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parser Combinators} + +One of the simplest ways to implement a parser, see +{\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip + +\begin{itemize} +\item[$\bullet$] build-in library in Scala +\item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite +\item[$\bullet$] possible exponential runtime behaviour +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Parser Combinators} + +Parser combinators: \bigskip + +\begin{minipage}{1.1\textwidth} +\begin{center} +\mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} +$\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ +\end{center} +\end{minipage}\bigskip + +\begin{itemize} +\item atomic parsers +\item sequencing +\item alternative +\item semantic action (map-parser) +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Atomic parsers, for example, number tokens + +\begin{center} +\bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} +\end{center}\bigskip + +\begin{itemize} +\item you consume one or more token from the\\ + input (stream) +\item also works for characters and strings +\end{itemize} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Alternative parser (code \bl{$p\;||\;q$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} and also \bl{$q$}; then combine + the outputs +\end{itemize} + +\begin{center} +\large \bl{$p(\text{input}) \cup q(\text{input})$} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Sequence parser (code \bl{$p\sim q$})\bigskip + +\begin{itemize} +\item apply first \bl{$p$} producing a set of pairs +\item then apply \bl{$q$} to the unparsed part +\item then combine the results:\medskip +\begin{center} +((output$_1$, output$_2$), unparsed part) +\end{center} +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] +\large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} +\end{tabular} +\end{center} + + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +Map-parser (code \bl{$p.map(f)\;$})\bigskip + +\begin{itemize} +\item apply \bl{$p$} producing a set of pairs +\item then apply the function \bl{$f$} to each first component +\end{itemize} + +\begin{center} +\begin{tabular}{l} +\large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} +\end{tabular} +\end{center}\bigskip\bigskip\pause + +\bl{$f$} is the semantic action (``what to do with the parsed input'') + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} + +Addition + +\begin{center} +\bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} +\end{center}\pause + +Multiplication + +\begin{center} +\bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} +\end{center}\pause + +Parenthesis + +\begin{center} +\bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Types of Parsers} + +\begin{itemize} +\item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, +then \bl{$p \sim q$} returns results of type + +\begin{center} +\bl{$T \times S$} +\end{center}\pause + +\item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, +and \bl{$p \;||\; q$} returns results of type + +\begin{center} +\bl{$T$} +\end{center}\pause + +\item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from +\bl{$T$} to \bl{$S$}, then +\bl{$p \Rightarrow f$} returns results of type + +\begin{center} +\bl{$S$} +\end{center} + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Input Types of Parsers} + +\begin{itemize} +\item input: \alert{token list} +\item output: set of (output\_type, \alert{token list}) +\end{itemize}\bigskip\pause + +actually it can be any input type as long as it is a kind of +sequence (for example a string) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Scannerless Parsers} + +\begin{itemize} +\item input: \alert{string} +\item output: set of (output\_type, \alert{string}) +\end{itemize}\bigskip\bigskip + +but using lexers is better because whitespaces or comments can be +filtered out; then input is a sequence of tokens + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Successful Parses} + +\begin{itemize} +\item input: string +\item output: \alert{set of} (output\_type, string) +\end{itemize}\bigskip + +a parse is successful whenever the input has been fully +``consumed'' (that is the second component is empty) + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Abstract Parser Class} + +\small +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app7.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +\small +\fontsize{10}{12}\selectfont +\lstinputlisting[language=Scala,xleftmargin=1mm] + {../progs/app8.scala} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Two Grammars} + +Which languages are recognised by the following two grammars? + +\begin{center} +\bl{\begin{tabular}{lcl} +$\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center}\bigskip + +\begin{center} +\bl{\begin{tabular}{lcl} +$\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ + & $|$ & $\epsilon$ +\end{tabular}} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[t] +\frametitle{Ambiguous Grammars} + +\begin{center} +\begin{tikzpicture} +\begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, + enlargelimits=false, + xtick={0,100,...,1000}, + xmax=1050, + ymax=33, + ytick={0,5,...,30}, + scaled ticks=false, + axis lines=left, + width=11cm, + height=7cm, + legend entries={unambiguous,ambiguous}, + legend pos=north east, + legend cell align=left, + x tick label style={font=\small,/pgf/number format/1000 sep={}}] +\addplot[blue,mark=*, mark options={fill=white}] + table {s-grammar1.data}; +\only<2>{ + \addplot[red,mark=triangle*, mark options={fill=white}] + table {s-grammar2.data};} +\end{axis} +\end{tikzpicture} +\end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + + %%% Local Variables: %%% mode: latex %%% TeX-master: t