101 \draw[->,line width=0.5mm] (r1) -- (v1); |
101 \draw[->,line width=0.5mm] (r1) -- (v1); |
102 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
102 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
103 \end{tikzpicture} |
103 \end{tikzpicture} |
104 \end{textblock} |
104 \end{textblock} |
105 |
105 |
106 \only<2->{ |
|
107 \begin{textblock}{6}(1,0.8) |
106 \begin{textblock}{6}(1,0.8) |
108 \begin{bubble}[6cm] |
107 \begin{bubble}[6cm] |
109 \small |
108 \small |
110 \begin{tabular}{ll} |
109 \begin{tabular}{ll} |
111 \bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\ |
110 \bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\ |
112 \bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\ |
111 \bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\ |
113 \bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\ |
112 \bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\ |
114 \bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\ |
113 \bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\ |
115 \end{tabular} |
114 \end{tabular} |
116 \end{bubble} |
115 \end{bubble} |
117 \end{textblock}} |
116 \end{textblock} |
118 |
117 |
119 \only<2->{ |
118 \begin{textblock}{6}(1,11.4) |
120 \begin{textblock}{6}(5,11.4) |
|
121 \begin{bubble}[7.6cm] |
119 \begin{bubble}[7.6cm] |
122 \small |
120 \small |
123 \begin{tabular}{ll} |
121 \begin{tabular}{ll} |
124 \bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\ |
122 \bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\ |
125 \bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\ |
123 \bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\ |
126 \bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\ |
124 \bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\ |
127 \bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\ |
125 \bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\ |
128 \end{tabular} |
126 \end{tabular} |
129 \end{bubble} |
127 \end{bubble} |
130 \end{textblock}} |
128 \end{textblock} |
131 \end{frame} |
129 |
132 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
130 \begin{textblock}{6}(12,11.4) |
133 |
131 \begin{bubble}[2cm] |
134 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
132 \small |
135 \begin{frame}[c] |
133 \begin{tabular}{ll} |
136 \frametitle{Mkeps} |
134 \bl{$|v_1|$}: & \bl{$abc$}\\ |
137 |
135 \bl{$|v_2|$}: & \bl{$bc$}\\ |
138 Finding a (posix) value for recognising the empty string: |
136 \bl{$|v_3|$}: & \bl{$c$}\\ |
139 |
137 \bl{$|v_4|$}: & \bl{$[]$} |
140 \begin{center} |
138 \end{tabular} |
141 \begin{tabular}{lcl} |
139 \end{bubble} |
142 \bl{$mkeps\,\epsilon$} & \bl{$\dn$} & \bl{$Empty$}\\ |
140 \end{textblock} |
143 \bl{$mkeps\,r_1 + r_2$} & \bl{$\dn$} & \bl{if $nullable(r_1)$} \\ |
141 |
144 & & \bl{then $Left(mkeps(r_1))$}\\ |
142 |
145 & & \bl{else $Right(mkeps(r_2))$}\\ |
143 \end{frame} |
146 \bl{$mkeps\,r_1 \cdot r_2$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),mkeps(r_2))$}\\ |
144 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
147 \bl{$mkeps\,r^*$} & \bl{$\dn$} & \bl{$[]$} \\ |
145 |
148 \end{tabular} |
146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
149 \end{center} |
147 \begin{frame}[c] |
150 |
148 \frametitle{Simplification} |
151 \end{frame} |
149 |
152 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
150 \begin{itemize} |
153 |
151 \item If we simplify after the derivative, then we are builing the |
154 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
152 value for the simplified regular expression, but \emph{not} for the original |
155 \begin{frame}[c] |
153 regular expression. |
156 \frametitle{Inject} |
154 \end{itemize} |
157 |
155 |
158 Injecting (``Adding'') a character to a value\\ |
156 \begin{center} |
159 |
|
160 \begin{center} |
|
161 \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} |
|
162 \bl{$inj\,(c)\,c\,Empty$} & \bl{$\dn$} & \bl{$Char\,c$}\\ |
|
163 \bl{$inj\,(r_1 + r_2)\,c\,Left(v)$} & \bl{$\dn$} & \bl{$Left(inj\,r_1\,c\,v)$}\\ |
|
164 \bl{$inj\,(r_1 + r_2)\,c\,Right(v)$} & \bl{$\dn$} & \bl{$Right(inj\,r_2\,c\,v)$}\\ |
|
165 \bl{$inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ |
|
166 \bl{$inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ |
|
167 \bl{$inj\,(r_1 \cdot r_2)\,c\,Right(v)$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\ |
|
168 \bl{$inj\,(r^*)\,c\,Seq(v,vs)$} & \bl{$\dn$} & \bl{$inj\,r\,c\,v\,::\,vs$}\\ |
|
169 \end{tabular} |
|
170 \end{center}\bigskip |
|
171 |
|
172 \footnotesize |
|
173 \bl{$inj$}: 1st arg $\mapsto$ a rexp; 2nd arg $\mapsto$ a character; 3rd arg $\mapsto$ a value |
|
174 \end{frame} |
|
175 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
176 |
|
177 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
178 \begin{frame}[c] |
|
179 \frametitle{Flatten} |
|
180 |
|
181 Obtaining the string underlying a value: |
|
182 |
|
183 \begin{center} |
|
184 \begin{tabular}{lcl} |
|
185 \bl{$|Empty|$} & \bl{$\dn$} & \bl{$[]$}\\ |
|
186 \bl{$|Char(c)|$} & \bl{$\dn$} & \bl{$[c]$}\\ |
|
187 \bl{$|Left(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ |
|
188 \bl{$|Right(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ |
|
189 \bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\ |
|
190 \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\ |
|
191 \end{tabular} |
|
192 \end{center} |
|
193 |
|
194 \end{frame} |
|
195 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
196 |
|
197 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
198 \begin{frame}[c] |
|
199 |
|
200 \begin{textblock}{10}(3,5) |
|
201 \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] |
157 \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] |
202 \node (r1) {\bl{$r_1$}}; |
158 \node (r1) {\bl{$r_1$}}; |
203 \node (r2) [right=of r1] {\bl{$r_2$}}; |
159 \node (r2) [right=of r1] {\bl{$r_2$}}; |
204 \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; |
160 \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; |
205 \node (r3) [right=of r2] {\bl{$r_3$}}; |
161 \node (r3) [right=of r2] {\bl{$r_3$}}; |
218 \draw[->,line width=0.5mm] (r3) -- (v3); |
174 \draw[->,line width=0.5mm] (r3) -- (v3); |
219 \draw[->,line width=0.5mm] (r2) -- (v2); |
175 \draw[->,line width=0.5mm] (r2) -- (v2); |
220 \draw[->,line width=0.5mm] (r1) -- (v1); |
176 \draw[->,line width=0.5mm] (r1) -- (v1); |
221 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
177 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
222 \end{tikzpicture} |
178 \end{tikzpicture} |
223 \end{textblock} |
|
224 |
|
225 \begin{textblock}{6}(1,0.8) |
|
226 \begin{bubble}[6cm] |
|
227 \small |
|
228 \begin{tabular}{ll} |
|
229 \bl{$r_1$}: & \bl{$a \cdot (b \cdot c)$}\\ |
|
230 \bl{$r_2$}: & \bl{$\epsilon \cdot (b \cdot c)$}\\ |
|
231 \bl{$r_3$}: & \bl{$(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$}\\ |
|
232 \bl{$r_4$}: & \bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$}\\ |
|
233 \end{tabular} |
|
234 \end{bubble} |
|
235 \end{textblock} |
|
236 |
|
237 \begin{textblock}{6}(1,11.4) |
|
238 \begin{bubble}[7.6cm] |
|
239 \small |
|
240 \begin{tabular}{ll} |
|
241 \bl{$v_1$}: & \bl{$Seq(Char(a), Seq(Char(b), Char(c)))$}\\ |
|
242 \bl{$v_2$}: & \bl{$Seq(Empty, Seq(Char(b), Char(c)))$}\\ |
|
243 \bl{$v_3$}: & \bl{$Right(Seq(Empty, Char(c)))$}\\ |
|
244 \bl{$v_4$}: & \bl{$Right(Right(Empty))$}\\ |
|
245 \end{tabular} |
|
246 \end{bubble} |
|
247 \end{textblock} |
|
248 |
|
249 \begin{textblock}{6}(12,11.4) |
|
250 \begin{bubble}[2cm] |
|
251 \small |
|
252 \begin{tabular}{ll} |
|
253 \bl{$|v_1|$}: & \bl{$abc$}\\ |
|
254 \bl{$|v_2|$}: & \bl{$bc$}\\ |
|
255 \bl{$|v_3|$}: & \bl{$c$}\\ |
|
256 \bl{$|v_4|$}: & \bl{$[]$} |
|
257 \end{tabular} |
|
258 \end{bubble} |
|
259 \end{textblock} |
|
260 |
|
261 |
|
262 \end{frame} |
|
263 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
264 |
|
265 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
266 \begin{frame}[c] |
|
267 \frametitle{Simplification} |
|
268 |
|
269 \begin{itemize} |
|
270 \item If we simplify after the derivative, then we are builing the |
|
271 value for the simplified regular expression, but \emph{not} for the original |
|
272 regular expression. |
|
273 \end{itemize} |
|
274 |
|
275 \begin{center} |
|
276 \begin{tikzpicture}[scale=2,node distance=1.3cm,every node/.style={minimum size=8mm}] |
|
277 \node (r1) {\bl{$r_1$}}; |
|
278 \node (r2) [right=of r1] {\bl{$r_2$}}; |
|
279 \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; |
|
280 \node (r3) [right=of r2] {\bl{$r_3$}}; |
|
281 \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}}; |
|
282 \node (r4) [right=of r3] {\bl{$r_4$}}; |
|
283 \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}}; |
|
284 \draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}}; |
|
285 \node (v4) [below=of r4] {\bl{$v_4$}}; |
|
286 \draw[->,line width=1mm] (r4) -- (v4); |
|
287 \node (v3) [left=of v4] {\bl{$v_3$}}; |
|
288 \draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}}; |
|
289 \node (v2) [left=of v3] {\bl{$v_2$}}; |
|
290 \draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}}; |
|
291 \node (v1) [left=of v2] {\bl{$v_1$}}; |
|
292 \draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}}; |
|
293 \draw[->,line width=0.5mm] (r3) -- (v3); |
|
294 \draw[->,line width=0.5mm] (r2) -- (v2); |
|
295 \draw[->,line width=0.5mm] (r1) -- (v1); |
|
296 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
|
297 \end{tikzpicture} |
|
298 \end{center} |
179 \end{center} |
299 |
180 |
300 \small |
181 \small |
301 \hspace{4.5cm}\bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$} |
182 \hspace{4.5cm}\bl{$(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$} |
302 $\mapsto$ |
183 $\mapsto$ |
305 \end{frame} |
186 \end{frame} |
306 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
187 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
307 |
188 |
308 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
189 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
309 \begin{frame}[c] |
190 \begin{frame}[c] |
310 \frametitle{Rectification} |
|
311 |
|
312 \begin{center} |
|
313 \begin{tabular}{l} |
|
314 \bl{$simp(r)$}:\\ |
|
315 \quad case \bl{$r = r_1 + r_2$}\\ |
|
316 \qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\ |
|
317 \qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\ |
|
318 \qquad case \bl{$r_{1s} = \varnothing$}: |
|
319 return \bl{$(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$}\\ |
|
320 \qquad case \bl{$r_{2s} = \varnothing$}: |
|
321 return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\ |
|
322 \qquad case \bl{$r_{1s} = r_{2s}$}: |
|
323 return \bl{$(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$}\\ |
|
324 \qquad otherwise: |
|
325 return \bl{$(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$}\\ |
|
326 \end{tabular} |
|
327 \end{center} |
|
328 |
|
329 \small |
|
330 \begin{center} |
|
331 \begin{tabular}{l@{\hspace{1mm}}l} |
|
332 \bl{$f_{alt}(f_1, f_2) \dn$}\\ |
|
333 \qquad \bl{$\lambda v.\,$} case \bl{$v = Left(v')$}: |
|
334 & return \bl{$Left(f_1(v'))$}\\ |
|
335 \qquad \phantom{$\lambda v.\,$} case \bl{$v = Right(v')$}: |
|
336 & return \bl{$Right(f_2(v'))$}\\ |
|
337 \end{tabular} |
|
338 \end{center} |
|
339 |
|
340 |
|
341 \end{frame} |
|
342 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
343 |
|
344 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
345 \begin{frame}[c] |
|
346 \frametitle{Rectification} |
|
347 |
|
348 \begin{center} |
|
349 \begin{tabular}{@{\hspace{-3mm}}l} |
|
350 \bl{$simp(r)$}:\ldots\\ |
|
351 \quad case \bl{$r = r_1 \cdot r_2$}\\ |
|
352 \qquad let \bl{$(r_{1s}, f_{1s}) = simp(r_1)$}\\ |
|
353 \qquad \phantom{let} \bl{$(r_{2s}, f_{2s}) = simp(r_2)$}\smallskip\\ |
|
354 \qquad case \bl{$r_{1s} = \varnothing$}: |
|
355 return \bl{$(\varnothing, f_{error})$}\\ |
|
356 \qquad case \bl{$r_{2s} = \varnothing$}: |
|
357 return \bl{$(\varnothing, f_{error})$}\\ |
|
358 \qquad case \bl{$r_{1s} = \epsilon$}: |
|
359 return \bl{$(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$}\\ |
|
360 \qquad case \bl{$r_{2s} = \epsilon$}: |
|
361 return \bl{$(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$}\\ |
|
362 \qquad otherwise: |
|
363 return \bl{$(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$}\\ |
|
364 \end{tabular} |
|
365 \end{center} |
|
366 |
|
367 \small |
|
368 \begin{center} |
|
369 \begin{tabular}{l@{\hspace{1mm}}l} |
|
370 \bl{$f_{seq}(f_1, f_2) \dn$}\\ |
|
371 \qquad \bl{$\lambda v.\,$ case $v = Seq(v_1, v_2)$}: |
|
372 & return \bl{$Seq(f_1(v_1), f_2(v_2))$}\\ |
|
373 \end{tabular} |
|
374 \end{center} |
|
375 \end{frame} |
|
376 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
377 |
|
378 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
379 \begin{frame}[c] |
|
380 \frametitle{Lexing with Simplification} |
|
381 |
|
382 \begin{center} |
|
383 \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} |
|
384 \bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\ |
|
385 \bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{let $(r', frect) = simp(der(c, r))$}\smallskip\\ |
|
386 & & \bl{$inj\,r\,c\,(frect(lex(r', s)))$}\\ |
|
387 \end{tabular} |
|
388 \end{center}\bigskip |
|
389 |
|
390 \begin{center}\small |
|
391 \begin{tikzpicture}[node distance=1.1cm,every node/.style={minimum size=7mm}] |
|
392 \node (r1) {\bl{$r_1$}}; |
|
393 \node (r2) [right=of r1] {\bl{$r_2$}}; |
|
394 \draw[->,line width=1mm] (r1) -- (r2) node[above,midway] {\bl{$der\,a$}}; |
|
395 \node (r3) [right=of r2] {\bl{$r_3$}}; |
|
396 \draw[->,line width=1mm] (r2) -- (r3) node[above,midway] {\bl{$der\,b$}}; |
|
397 \node (r4) [right=of r3] {\bl{$r_4$}}; |
|
398 \draw[->,line width=1mm] (r3) -- (r4) node[above,midway] {\bl{$der\,c$}}; |
|
399 \draw (r4) node[anchor=west] {\;\raisebox{3mm}{\bl{$nullable$}}}; |
|
400 \node (v4) [below=of r4] {\bl{$v_4$}}; |
|
401 \draw[->,line width=1mm] (r4) -- (v4); |
|
402 \node (v3) [left=of v4] {\bl{$v_3$}}; |
|
403 \draw[->,line width=1mm] (v4) -- (v3) node[below,midway] {\bl{$inj\,c$}}; |
|
404 \node (v2) [left=of v3] {\bl{$v_2$}}; |
|
405 \draw[->,line width=1mm] (v3) -- (v2) node[below,midway] {\bl{$inj\,b$}}; |
|
406 \node (v1) [left=of v2] {\bl{$v_1$}}; |
|
407 \draw[->,line width=1mm] (v2) -- (v1) node[below,midway] {\bl{$inj\,a$}}; |
|
408 \draw[->,line width=0.5mm] (r3) -- (v3); |
|
409 \draw[->,line width=0.5mm] (r2) -- (v2); |
|
410 \draw[->,line width=0.5mm] (r1) -- (v1); |
|
411 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{\bl{$mkeps$}}}; |
|
412 \end{tikzpicture} |
|
413 \end{center} |
|
414 |
|
415 |
|
416 \end{frame} |
|
417 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
418 |
|
419 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
420 \begin{frame}[c] |
|
421 \frametitle{Records} |
191 \frametitle{Records} |
422 |
192 |
423 \begin{itemize} |
193 \begin{itemize} |
424 \item new regex: \bl{$(x:r)$}\hspace{7mm}new value: \bl{$Rec(x,v)$}\medskip\pause |
194 \item new regex: \bl{$(x:r)$}\hspace{7mm}new value: |
|
195 \bl{$Rec(x,v)$}\medskip |
425 |
196 |
426 \item \bl{$nullable(x:r) \dn nullable(r)$} |
197 \item \bl{$nullable(x:r) \dn nullable(r)$} |
427 \item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$} |
198 \item \bl{$der\,c\,(x:r) \dn (x:der\,c\,r)$} |
428 \item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$} |
199 \item \bl{$mkeps(x:r) \dn Rec(x, mkeps(r))$} |
429 \item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$} |
200 \item \bl{$inj\,(x:r)\,c\,v \dn Rec(x, inj\,r\,c\,v)$} |
430 \end{itemize}\bigskip\bigskip\pause |
201 \end{itemize}\bigskip\bigskip |
431 |
202 |
432 \small |
203 \small |
433 for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$} |
204 for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$} |
434 |
205 |
435 \end{frame} |
206 \end{frame} |
436 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
207 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
208 |
|
209 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
210 \begin{frame}[c] |
|
211 \frametitle{Environments} |
|
212 |
|
213 Obtaining the ``recorded'' parts of a value: |
|
214 |
|
215 \begin{center} |
|
216 \begin{tabular}{lcl} |
|
217 \bl{$env(Empty)$} & \bl{$\dn$} & \bl{$[]$}\\ |
|
218 \bl{$env(Char(c))$} & \bl{$\dn$} & \bl{$[]$}\\ |
|
219 \bl{$env(Left(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ |
|
220 \bl{$env(Right(v))$} & \bl{$\dn$} & \bl{$env(v)$}\\ |
|
221 \bl{$env(Seq(v_1,v_2))$}& \bl{$\dn$} & \bl{$env(v_1) \,@\, env(v_2)$}\\ |
|
222 \bl{$env([v_1,\ldots ,v_n])$} & \bl{$\dn$} & |
|
223 \bl{$env(v_1) \,@\ldots @\, env(v_n)$}\\ |
|
224 \bl{$env(Rec(x:v))$} & \bl{$\dn$} & \bl{$(x:|v|) :: env(v)$}\\ |
|
225 \end{tabular} |
|
226 \end{center} |
|
227 |
|
228 \end{frame} |
|
229 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
230 |
437 |
231 |
438 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
232 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
439 \begin{frame}[c] |
233 \begin{frame}[c] |
440 \frametitle{While Tokens} |
234 \frametitle{While Tokens} |
441 |
235 |
621 \end{center} |
368 \end{center} |
622 |
369 |
623 \end{frame} |
370 \end{frame} |
624 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
371 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
625 |
372 |
|
373 |
|
374 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
375 \begin{frame}[c] |
|
376 \frametitle{Regular Languages} |
|
377 |
|
378 While regular expressions are very useful for lexing, there is |
|
379 no regular expression that can recognise the language |
|
380 \bl{$a^nb^n$}.\bigskip |
|
381 |
|
382 \begin{center} |
|
383 \bl{$(((()()))())$} \;\;vs.\;\; \bl{$(((()()))()))$} |
|
384 \end{center}\bigskip\bigskip |
|
385 |
|
386 \small |
|
387 \noindent So we cannot find out with regular expressions |
|
388 whether parentheses are matched or unmatched. Also regular |
|
389 expressions are not recursive, e.g.~\bl{$(1 + 2) + 3$}. |
|
390 |
|
391 \end{frame} |
|
392 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
393 |
|
394 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
395 \begin{frame}[c] |
|
396 \frametitle{Hierarchy of Languages} |
|
397 |
|
398 \begin{center} |
|
399 \begin{tikzpicture} |
|
400 [rect/.style={draw=black!50, |
|
401 top color=white, |
|
402 bottom color=black!20, |
|
403 rectangle, |
|
404 very thick, |
|
405 rounded corners}, scale=1.2] |
|
406 |
|
407 \draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages}; |
|
408 \draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages}; |
|
409 \draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages}; |
|
410 \draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages}; |
|
411 \draw (0,-1.4) node [rect] {regular languages}; |
|
412 \end{tikzpicture} |
|
413 |
|
414 \end{center} |
|
415 |
|
416 \end{frame} |
|
417 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
418 |
|
419 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
420 \begin{frame}[c] |
|
421 \frametitle{Grammars} |
|
422 |
|
423 A (context-free) grammar \bl{$G$} consists of |
|
424 |
|
425 \begin{itemize} |
|
426 \item a finite set of nonterminal symbols (upper case) |
|
427 \item a finite terminal symbols or tokens (lower case) |
|
428 \item a start symbol (which must be a nonterminal) |
|
429 \item a set of rules |
|
430 \begin{center} |
|
431 \bl{$A \rightarrow \textit{rhs}$} |
|
432 \end{center} |
|
433 |
|
434 where \bl{\textit{rhs}} are sequences involving terminals and nonterminals, |
|
435 including the empty sequence \bl{$\epsilon$}.\medskip\pause |
|
436 |
|
437 We also allow rules |
|
438 \begin{center} |
|
439 \bl{$A \rightarrow \textit{rhs}_1 | \textit{rhs}_2 | \ldots$} |
|
440 \end{center} |
|
441 \end{itemize} |
|
442 |
|
443 \end{frame} |
|
444 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
445 |
|
446 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
447 \begin{frame}[c] |
|
448 \frametitle{Palindromes} |
|
449 |
|
450 A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}: |
|
451 |
|
452 \begin{center} |
|
453 \bl{\begin{tabular}{lcl} |
|
454 $S$ & $\rightarrow$ & $\epsilon$ \\ |
|
455 $S$ & $\rightarrow$ & $a\cdot S\cdot a$ \\ |
|
456 $S$ & $\rightarrow$ & $b\cdot S\cdot b$ \\ |
|
457 \end{tabular}} |
|
458 \end{center}\pause |
|
459 |
|
460 or |
|
461 |
|
462 \begin{center} |
|
463 \bl{\begin{tabular}{lcl} |
|
464 $S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ |
|
465 \end{tabular}} |
|
466 \end{center}\pause\bigskip |
|
467 |
|
468 \small |
|
469 Can you find the grammar rules for matched parentheses? |
|
470 |
|
471 \end{frame} |
|
472 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
473 |
|
474 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
475 \begin{frame}[c] |
|
476 \frametitle{Arithmetic Expressions} |
|
477 |
|
478 \begin{center} |
|
479 \bl{\begin{tabular}{lcl} |
|
480 $E$ & $\rightarrow$ & $num\_token$ \\ |
|
481 $E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ |
|
482 $E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ |
|
483 $E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ |
|
484 $E$ & $\rightarrow$ & $( \cdot E \cdot )$ |
|
485 \end{tabular}} |
|
486 \end{center}\pause |
|
487 |
|
488 \bl{\texttt{1 + 2 * 3 + 4}} |
|
489 |
|
490 \end{frame} |
|
491 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
492 |
|
493 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
494 \begin{frame}[c] |
|
495 \frametitle{A CFG Derivation} |
|
496 |
|
497 \begin{enumerate} |
|
498 \item Begin with a string containing only the start symbol, say \bl{$S$}\bigskip |
|
499 \item Replace any nonterminal \bl{$X$} in the string by the |
|
500 right-hand side of some production \bl{$X \rightarrow \textit{rhs}$}\bigskip |
|
501 \item Repeat 2 until there are no nonterminals |
|
502 \end{enumerate} |
|
503 |
|
504 \begin{center} |
|
505 \bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $} |
|
506 \end{center} |
|
507 |
|
508 \end{frame} |
|
509 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
510 |
|
511 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
512 \begin{frame}[c] |
|
513 \frametitle{Example Derivation} |
|
514 |
|
515 \begin{center} |
|
516 \bl{\begin{tabular}{lcl} |
|
517 $S$ & $\rightarrow$ & $\epsilon \;|\; a\cdot S\cdot a \;|\;b\cdot S\cdot b$ \\ |
|
518 \end{tabular}} |
|
519 \end{center}\bigskip |
|
520 |
|
521 \begin{center} |
|
522 \begin{tabular}{lcl} |
|
523 \bl{$S$} & \bl{$\rightarrow$} & \bl{$aSa$}\\ |
|
524 & \bl{$\rightarrow$} & \bl{$abSba$}\\ |
|
525 & \bl{$\rightarrow$} & \bl{$abaSaba$}\\ |
|
526 & \bl{$\rightarrow$} & \bl{$abaaba$}\\ |
|
527 |
|
528 |
|
529 \end{tabular} |
|
530 \end{center} |
|
531 |
|
532 \end{frame} |
|
533 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
534 |
|
535 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
536 \begin{frame}[c] |
|
537 \frametitle{Example Derivation} |
|
538 |
|
539 \begin{center} |
|
540 \bl{\begin{tabular}{lcl} |
|
541 $E$ & $\rightarrow$ & $num\_token$ \\ |
|
542 $E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ |
|
543 $E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ |
|
544 $E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ |
|
545 $E$ & $\rightarrow$ & $( \cdot E \cdot )$ |
|
546 \end{tabular}} |
|
547 \end{center}\bigskip |
|
548 |
|
549 |
|
550 \begin{center} |
|
551 \begin{tabular}{@{}c@{}c@{}} |
|
552 \begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l} |
|
553 \bl{$E$} & \bl{$\rightarrow$} & \bl{$E*E$}\\ |
|
554 & \bl{$\rightarrow$} & \bl{$E+E*E$}\\ |
|
555 & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\ |
|
556 & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\ |
|
557 \end{tabular} &\pause |
|
558 \begin{tabular}{l@{\hspace{1mm}}l@{\hspace{1mm}}l} |
|
559 \bl{$E$} & \bl{$\rightarrow$} & \bl{$E+E$}\\ |
|
560 & \bl{$\rightarrow$} & \bl{$E+E+E$}\\ |
|
561 & \bl{$\rightarrow$} & \bl{$E+E*E+E$}\\ |
|
562 & \bl{$\rightarrow^+$} & \bl{$1+2*3+4$}\\ |
|
563 \end{tabular} |
|
564 \end{tabular} |
|
565 \end{center} |
|
566 |
|
567 \end{frame} |
|
568 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
569 |
|
570 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
571 \begin{frame}[c] |
|
572 \frametitle{Context Sensitive Grms} |
|
573 |
|
574 |
|
575 \begin{center} |
|
576 \bl{\begin{tabular}{lcl} |
|
577 $S$ & $\Rightarrow$ & $bSAA\;|\; \epsilon$\\ |
|
578 $A$ & $\Rightarrow$ & $a$\\ |
|
579 $bA$ & $\Rightarrow$ & $Ab$\\ |
|
580 \end{tabular}} |
|
581 \end{center}\pause |
|
582 |
|
583 \begin{center} |
|
584 \bl{$S \Rightarrow\ldots\Rightarrow^? "ababaa"$} |
|
585 \end{center} |
|
586 |
|
587 \end{frame} |
|
588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
589 |
|
590 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
591 \begin{frame}[c] |
|
592 \frametitle{Language of a CFG} |
|
593 |
|
594 Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. |
|
595 Then the language \bl{$L(G)$} is: |
|
596 |
|
597 \begin{center} |
|
598 \bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$} |
|
599 \end{center}\pause |
|
600 |
|
601 \begin{itemize} |
|
602 \item Terminals, because there are no rules for replacing them. |
|
603 \item Once generated, terminals are ``permanent''. |
|
604 \item Terminals ought to be tokens of the language\\ |
|
605 (but can also be strings). |
|
606 \end{itemize} |
|
607 |
|
608 \end{frame} |
|
609 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
610 |
|
611 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
612 \begin{frame}[c] |
|
613 \frametitle{Parse Trees} |
|
614 |
|
615 \begin{center} |
|
616 \bl{\begin{tabular}{lcl} |
|
617 $E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F$\\ |
|
618 $F$ & $\rightarrow$ & $T \;|\; T \cdot + \cdot T \;|\; T \cdot - \cdot T$\\ |
|
619 $T$ & $\rightarrow$ & $num\_token \;|\; ( \cdot E \cdot )$\\ |
|
620 \end{tabular}} |
|
621 \end{center} |
|
622 |
|
623 \begin{center} |
|
624 \begin{tikzpicture}[level distance=8mm, blue] |
|
625 \node {$E$} |
|
626 child {node {$F$} |
|
627 child {node {$T$} |
|
628 child {node {(\,$E$\,)} |
|
629 child {node{$F$ *{} $F$} |
|
630 child {node {$T$} child {node {2}}} |
|
631 child {node {$T$} child {node {3}}} |
|
632 } |
|
633 } |
|
634 } |
|
635 child {node {+}} |
|
636 child {node {$T$} |
|
637 child {node {(\,$E$\,)} |
|
638 child {node {$F$} |
|
639 child {node {$T$ +{} $T$} |
|
640 child {node {3}} |
|
641 child {node {4}} |
|
642 } |
|
643 }} |
|
644 }}; |
|
645 \end{tikzpicture} |
|
646 \end{center} |
|
647 |
|
648 \begin{textblock}{5}(1, 6.5) |
|
649 \bl{\texttt{(2*3)+(3+4)}} |
|
650 \end{textblock} |
|
651 |
|
652 \end{frame} |
|
653 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
654 |
|
655 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
656 \begin{frame}[c] |
|
657 \frametitle{Arithmetic Expressions} |
|
658 |
|
659 \begin{center} |
|
660 \bl{\begin{tabular}{lcl} |
|
661 $E$ & $\rightarrow$ & $num\_token$ \\ |
|
662 $E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ |
|
663 $E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ |
|
664 $E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ |
|
665 $E$ & $\rightarrow$ & $( \cdot E \cdot )$ |
|
666 \end{tabular}} |
|
667 \end{center}\pause\bigskip |
|
668 |
|
669 A CFG is \alert{\bf left-recursive} if it has a nonterminal \bl{$E$} such |
|
670 that \bl{$E \rightarrow^+ E\cdot \ldots$} |
|
671 |
|
672 \end{frame} |
|
673 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
674 |
|
675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
676 \begin{frame}[c] |
|
677 \frametitle{Ambiguous Grammars} |
|
678 |
|
679 A grammar is \alert{\bf ambiguous} if there is a string that |
|
680 has at least two different parse trees. |
|
681 |
|
682 |
|
683 \begin{center} |
|
684 \bl{\begin{tabular}{lcl} |
|
685 $E$ & $\rightarrow$ & $num\_token$ \\ |
|
686 $E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ |
|
687 $E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ |
|
688 $E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ |
|
689 $E$ & $\rightarrow$ & $( \cdot E \cdot )$ |
|
690 \end{tabular}} |
|
691 \end{center} |
|
692 |
|
693 \bl{\texttt{1 + 2 * 3 + 4}} |
|
694 |
|
695 \end{frame} |
|
696 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
697 |
|
698 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
699 \begin{frame}[c] |
|
700 \frametitle{Dangling Else} |
|
701 |
|
702 Another ambiguous grammar:\bigskip |
|
703 |
|
704 \begin{center} |
|
705 \bl{\begin{tabular}{lcl} |
|
706 $E$ & $\rightarrow$ & if $E$ then $E$\\ |
|
707 & $|$ & if $E$ then $E$ else $E$ \\ |
|
708 & $|$ & \ldots |
|
709 \end{tabular}} |
|
710 \end{center}\bigskip |
|
711 |
|
712 \bl{\texttt{if a then if x then y else c}} |
|
713 |
|
714 \end{frame} |
|
715 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
716 |
|
717 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
718 \begin{frame}[c] |
|
719 \frametitle{Parser Combinators} |
|
720 |
|
721 One of the simplest ways to implement a parser, see |
|
722 {\small\url{https://vimeo.com/142341803}}\bigskip |
|
723 |
|
724 Parser combinators: \bigskip |
|
725 |
|
726 \begin{minipage}{1.1\textwidth} |
|
727 \begin{center} |
|
728 \mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} |
|
729 $\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ |
|
730 \end{center} |
|
731 \end{minipage}\bigskip |
|
732 |
|
733 \begin{itemize} |
|
734 \item atomic parsers |
|
735 \item sequencing |
|
736 \item alternative |
|
737 \item semantic action |
|
738 \end{itemize} |
|
739 |
|
740 \end{frame} |
|
741 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
742 |
|
743 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
744 \begin{frame}[c] |
|
745 |
|
746 Atomic parsers, for example, number tokens |
|
747 |
|
748 \begin{center} |
|
749 \bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} |
|
750 \end{center}\bigskip |
|
751 |
|
752 \begin{itemize} |
|
753 \item you consume one or more token from the\\ |
|
754 input (stream) |
|
755 \item also works for characters and strings |
|
756 \end{itemize} |
|
757 \end{frame} |
|
758 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
759 |
|
760 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
761 \begin{frame}[c] |
|
762 |
|
763 Alternative parser (code \bl{$p\;||\;q$})\bigskip |
|
764 |
|
765 \begin{itemize} |
|
766 \item apply \bl{$p$} and also \bl{$q$}; then combine |
|
767 the outputs |
|
768 \end{itemize} |
|
769 |
|
770 \begin{center} |
|
771 \large \bl{$p(\text{input}) \cup q(\text{input})$} |
|
772 \end{center} |
|
773 |
|
774 \end{frame} |
|
775 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
776 |
|
777 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
778 \begin{frame}[c] |
|
779 |
|
780 Sequence parser (code \bl{$p\sim q$})\bigskip |
|
781 |
|
782 \begin{itemize} |
|
783 \item apply first \bl{$p$} producing a set of pairs |
|
784 \item then apply \bl{$q$} to the unparsed parts |
|
785 \item then combine the results:\medskip |
|
786 \begin{center} |
|
787 ((output$_1$, output$_2$), unparsed part) |
|
788 \end{center} |
|
789 \end{itemize} |
|
790 |
|
791 \begin{center} |
|
792 \begin{tabular}{l} |
|
793 \large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] |
|
794 \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] |
|
795 \large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} |
|
796 \end{tabular} |
|
797 \end{center} |
|
798 |
|
799 |
|
800 \end{frame} |
|
801 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
802 |
|
803 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
804 \begin{frame}[c] |
|
805 |
|
806 Function parser (code \bl{$p \Rightarrow f\;$})\bigskip |
|
807 |
|
808 \begin{itemize} |
|
809 \item apply \bl{$p$} producing a set of pairs |
|
810 \item then apply the function \bl{$f$} to each first component |
|
811 \end{itemize} |
|
812 |
|
813 \begin{center} |
|
814 \begin{tabular}{l} |
|
815 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} |
|
816 \end{tabular} |
|
817 \end{center}\bigskip\bigskip\pause |
|
818 |
|
819 \bl{$f$} is the semantic action (``what to do with the parsed input'') |
|
820 |
|
821 \end{frame} |
|
822 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
823 |
|
824 |
|
825 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
826 \mode<presentation>{ |
|
827 \begin{frame}[c] |
|
828 \frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} |
|
829 |
|
830 Addition |
|
831 |
|
832 \begin{center} |
|
833 \bl{$T \sim + \sim E \Rightarrow \underbrace{f((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} |
|
834 \end{center}\pause |
|
835 |
|
836 Multiplication |
|
837 |
|
838 \begin{center} |
|
839 \bl{$F \sim * \sim T \Rightarrow f((x,y), z) \Rightarrow x * z$} |
|
840 \end{center}\pause |
|
841 |
|
842 Parenthesis |
|
843 |
|
844 \begin{center} |
|
845 \bl{$\text{(} \sim E \sim \text{)} \Rightarrow f((x,y), z) \Rightarrow y$} |
|
846 \end{center} |
|
847 |
|
848 \end{frame}} |
|
849 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
850 |
|
851 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
852 \begin{frame}[c] |
|
853 \frametitle{Types of Parsers} |
|
854 |
|
855 \begin{itemize} |
|
856 \item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, |
|
857 then \bl{$p \sim q$} returns results of type |
|
858 |
|
859 \begin{center} |
|
860 \bl{$T \times S$} |
|
861 \end{center}\pause |
|
862 |
|
863 \item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, |
|
864 and \bl{$p \;||\; q$} returns results of type |
|
865 |
|
866 \begin{center} |
|
867 \bl{$T$} |
|
868 \end{center}\pause |
|
869 |
|
870 \item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from |
|
871 \bl{$T$} to \bl{$S$}, then |
|
872 \bl{$p \Rightarrow f$} returns results of type |
|
873 |
|
874 \begin{center} |
|
875 \bl{$S$} |
|
876 \end{center} |
|
877 |
|
878 \end{itemize} |
|
879 |
|
880 \end{frame} |
|
881 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
882 |
|
883 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
884 \begin{frame}[c] |
|
885 \frametitle{Input Types of Parsers} |
|
886 |
|
887 \begin{itemize} |
|
888 \item input: \alert{token list} |
|
889 \item output: set of (output\_type, \alert{token list}) |
|
890 \end{itemize}\bigskip\pause |
|
891 |
|
892 actually it can be any input type as long as it is a kind of |
|
893 sequence (for example a string) |
|
894 |
|
895 \end{frame} |
|
896 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
897 |
|
898 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
899 \begin{frame}[c] |
|
900 \frametitle{Scannerless Parsers} |
|
901 |
|
902 \begin{itemize} |
|
903 \item input: \alert{string} |
|
904 \item output: set of (output\_type, \alert{string}) |
|
905 \end{itemize}\bigskip |
|
906 |
|
907 but lexers are better when whitespaces or comments need to be |
|
908 filtered out; then input is a sequence of tokens |
|
909 |
|
910 \end{frame} |
|
911 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
912 |
|
913 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
914 \begin{frame}[c] |
|
915 \frametitle{Successful Parses} |
|
916 |
|
917 \begin{itemize} |
|
918 \item input: string |
|
919 \item output: \alert{set of} (output\_type, string) |
|
920 \end{itemize}\bigskip |
|
921 |
|
922 a parse is successful whenever the input has been fully |
|
923 ``consumed'' (that is the second component is empty) |
|
924 |
|
925 \end{frame} |
|
926 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
927 |
|
928 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
929 \begin{frame}[c] |
|
930 \frametitle{Abstract Parser Class} |
|
931 |
|
932 \small |
|
933 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
934 {../progs/app7.scala} |
|
935 \end{frame} |
|
936 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
937 |
|
938 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
939 \begin{frame}[c] |
|
940 |
|
941 \small |
|
942 \fontsize{10}{12}\selectfont |
|
943 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
944 {../progs/app8.scala} |
|
945 \end{frame} |
|
946 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
947 |
|
948 |
|
949 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
950 \begin{frame}[c] |
|
951 \frametitle{Two Grammars} |
|
952 |
|
953 Which languages are recognised by the following two grammars? |
|
954 |
|
955 \begin{center} |
|
956 \bl{\begin{tabular}{lcl} |
|
957 $S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ |
|
958 & $|$ & $\epsilon$ |
|
959 \end{tabular}} |
|
960 \end{center}\bigskip |
|
961 |
|
962 \begin{center} |
|
963 \bl{\begin{tabular}{lcl} |
|
964 $U$ & $\rightarrow$ & $1 \cdot U$\\ |
|
965 & $|$ & $\epsilon$ |
|
966 \end{tabular}} |
|
967 \end{center} |
|
968 |
|
969 \end{frame} |
|
970 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
971 |
|
972 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
973 \begin{frame}[t] |
|
974 \frametitle{Ambiguous Grammars} |
|
975 |
|
976 \begin{center} |
|
977 \begin{tikzpicture} |
|
978 \begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, |
|
979 enlargelimits=false, |
|
980 xtick={0,100,...,1000}, |
|
981 xmax=1050, |
|
982 ymax=33, |
|
983 ytick={0,5,...,30}, |
|
984 scaled ticks=false, |
|
985 axis lines=left, |
|
986 width=11cm, |
|
987 height=7cm, |
|
988 legend entries={unambiguous,ambiguous}, |
|
989 legend pos=north east, |
|
990 legend cell align=left, |
|
991 x tick label style={font=\small,/pgf/number format/1000 sep={}}] |
|
992 \addplot[blue,mark=*, mark options={fill=white}] |
|
993 table {s-grammar1.data}; |
|
994 \only<2>{ |
|
995 \addplot[red,mark=triangle*, mark options={fill=white}] |
|
996 table {s-grammar2.data};} |
|
997 \end{axis} |
|
998 \end{tikzpicture} |
|
999 \end{center} |
|
1000 |
|
1001 \end{frame} |
|
1002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1003 |
|
1004 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1005 \begin{frame} |
|
1006 \frametitle{While-Language} |
|
1007 \mbox{}\\[-23mm]\mbox{} |
|
1008 |
|
1009 \bl{\begin{plstx}[rhs style=,one per line] |
|
1010 : \meta{Stmt} ::= skip |
|
1011 | \meta{Id} := \meta{AExp} |
|
1012 | if \meta{BExp} then \meta{Block} else \meta{Block} |
|
1013 | while \meta{BExp} do \meta{Block}\\ |
|
1014 : \meta{Stmts} ::= \meta{Stmt} ; \meta{Stmts} |
|
1015 | \meta{Stmt}\\ |
|
1016 : \meta{Block} ::= \{ \meta{Stmts} \} |
|
1017 | \meta{Stmt}\\ |
|
1018 : \meta{AExp} ::= \ldots\\ |
|
1019 : \meta{BExp} ::= \ldots\\ |
|
1020 \end{plstx}} |
|
1021 |
|
1022 \end{frame} |
|
1023 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1024 |
|
1025 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1026 \begin{frame}[c] |
|
1027 \frametitle{An Interpreter} |
|
1028 |
|
1029 \begin{center} |
|
1030 \bl{\begin{tabular}{l} |
|
1031 $\{$\\ |
|
1032 \;\;$x := 5 \text{;}$\\ |
|
1033 \;\;$y := x * 3\text{;}$\\ |
|
1034 \;\;$y := x * 4\text{;}$\\ |
|
1035 \;\;$x := u * 3$\\ |
|
1036 $\}$ |
|
1037 \end{tabular}} |
|
1038 \end{center} |
|
1039 |
|
1040 \begin{itemize} |
|
1041 \item the interpreter has to record the value of \bl{$x$} before assigning a value to \bl{$y$}\pause |
|
1042 \item \bl{\texttt{eval(stmt, env)}} |
|
1043 \end{itemize} |
|
1044 |
|
1045 \end{frame} |
|
1046 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1047 |
|
1048 |
|
1049 |
626 \end{document} |
1050 \end{document} |
627 |
1051 |
628 %%% Local Variables: |
1052 %%% Local Variables: |
629 %%% mode: latex |
1053 %%% mode: latex |
630 %%% TeX-master: t |
1054 %%% TeX-master: t |