58 \end{center} |
58 \end{center} |
59 |
59 |
60 \end{frame} |
60 \end{frame} |
61 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
61 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
62 |
62 |
|
63 |
|
64 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
65 \begin{frame}[c] |
|
66 \frametitle{Parser Combinators} |
|
67 |
|
68 One of the simplest ways to implement a parser, see |
|
69 {\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip |
|
70 |
|
71 \begin{itemize} |
|
72 \item[$\bullet$] build-in library in Scala |
|
73 \item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite |
|
74 \item[$\bullet$] possible exponential runtime behaviour |
|
75 \end{itemize} |
|
76 |
|
77 \end{frame} |
|
78 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
79 |
|
80 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
81 \begin{frame}[c] |
|
82 \frametitle{Parser Combinators} |
|
83 |
|
84 Parser combinators: \bigskip |
|
85 |
|
86 \begin{minipage}{1.1\textwidth} |
|
87 \begin{center} |
|
88 \mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} |
|
89 $\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ |
|
90 \end{center} |
|
91 \end{minipage}\bigskip |
|
92 |
|
93 \begin{itemize} |
|
94 \item atomic parsers |
|
95 \item sequencing |
|
96 \item alternative |
|
97 \item semantic action (map-parser) |
|
98 \end{itemize} |
|
99 |
|
100 \end{frame} |
|
101 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
102 |
|
103 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
104 \begin{frame}[c] |
|
105 |
|
106 Atomic parsers, for example, number tokens |
|
107 |
|
108 \begin{center} |
|
109 \bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} |
|
110 \end{center}\bigskip |
|
111 |
|
112 \begin{itemize} |
|
113 \item you consume one or more token from the\\ |
|
114 input (stream) |
|
115 \item also works for characters and strings |
|
116 \end{itemize} |
|
117 \end{frame} |
|
118 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
119 |
|
120 |
|
121 |
|
122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
123 \begin{frame}[c] |
|
124 |
|
125 Alternative parser (code \bl{$p\;||\;q$})\bigskip |
|
126 |
|
127 \begin{itemize} |
|
128 \item apply \bl{$p$} and also \bl{$q$}; then combine |
|
129 the outputs |
|
130 \end{itemize} |
|
131 |
|
132 \begin{center} |
|
133 \large \bl{$p(\text{input}) \cup q(\text{input})$} |
|
134 \end{center} |
|
135 |
|
136 \end{frame} |
|
137 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
138 |
|
139 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
140 \begin{frame}[c] |
|
141 |
|
142 Sequence parser (code \bl{$p\sim q$})\bigskip |
|
143 |
|
144 \begin{itemize} |
|
145 \item apply first \bl{$p$} producing a set of pairs |
|
146 \item then apply \bl{$q$} to the unparsed parts |
|
147 \item then combine the results:\medskip |
|
148 \begin{center} |
|
149 ((output$_1$, output$_2$), unparsed part) |
|
150 \end{center} |
|
151 \end{itemize} |
|
152 |
|
153 \begin{center} |
|
154 \begin{tabular}{l} |
|
155 \large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] |
|
156 \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] |
|
157 \large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} |
|
158 \end{tabular} |
|
159 \end{center} |
|
160 |
|
161 |
|
162 \end{frame} |
|
163 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
164 |
|
165 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
166 \begin{frame}[c] |
|
167 |
|
168 Map-parser (code \bl{$p.map(f)\;$})\bigskip |
|
169 |
|
170 \begin{itemize} |
|
171 \item apply \bl{$p$} producing a set of pairs |
|
172 \item then apply the function \bl{$f$} to each first component |
|
173 \end{itemize} |
|
174 |
|
175 \begin{center} |
|
176 \begin{tabular}{l} |
|
177 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} |
|
178 \end{tabular} |
|
179 \end{center}\bigskip\bigskip\pause |
|
180 |
|
181 \bl{$f$} is the semantic action (``what to do with the parsed input'') |
|
182 |
|
183 \end{frame} |
|
184 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
185 |
|
186 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
187 \begin{frame}[c] |
|
188 \frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} |
|
189 |
|
190 Addition |
|
191 |
|
192 \begin{center} |
|
193 \bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} |
|
194 \end{center}\pause |
|
195 |
|
196 Multiplication |
|
197 |
|
198 \begin{center} |
|
199 \bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} |
|
200 \end{center}\pause |
|
201 |
|
202 Parenthesis |
|
203 |
|
204 \begin{center} |
|
205 \bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} |
|
206 \end{center} |
|
207 |
|
208 \end{frame} |
|
209 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
210 |
|
211 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
212 \begin{frame}[c] |
|
213 \frametitle{Input Types of Parsers} |
|
214 |
|
215 \begin{itemize} |
|
216 \item input: \alert{token list} |
|
217 \item output: set of (output\_type, \alert{token list}) |
|
218 \end{itemize}\bigskip\pause |
|
219 |
|
220 actually it can be any input type as long as it is a kind of |
|
221 sequence (for example a string) |
|
222 |
|
223 \end{frame} |
|
224 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
225 |
|
226 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
227 \begin{frame}[c] |
|
228 \frametitle{Scannerless Parsers} |
|
229 |
|
230 \begin{itemize} |
|
231 \item input: \alert{string} |
|
232 \item output: set of (output\_type, \alert{string}) |
|
233 \end{itemize}\bigskip\bigskip |
|
234 |
|
235 but using lexers is better because whitespaces or comments can be |
|
236 filtered out; then input is a sequence of tokens |
|
237 |
|
238 \end{frame} |
|
239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
240 |
|
241 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
242 \begin{frame}[c] |
|
243 \frametitle{Abstract Parser Class} |
|
244 |
|
245 \small |
|
246 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
247 {../progs/app7.scala} |
|
248 \end{frame} |
|
249 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
250 |
|
251 |
|
252 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
253 \begin{frame}[c] |
|
254 \frametitle{Successful Parses} |
|
255 |
|
256 \begin{itemize} |
|
257 \item input: string |
|
258 \item output: \alert{set of} (output\_type, string) |
|
259 \end{itemize}\bigskip |
|
260 |
|
261 a parse is successful whenever the input has been fully |
|
262 ``consumed'' (that is the second component is empty) |
|
263 |
|
264 \end{frame} |
|
265 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
266 |
|
267 |
|
268 |
63 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
269 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
64 \begin{frame}[c] |
270 \begin{frame}[c] |
65 %%\frametitle{Test Program} |
271 %%\frametitle{Test Program} |
66 |
272 |
67 \mbox{}\\[-18mm]\mbox{} |
273 \mbox{}\\[-18mm]\mbox{} |