51 \end{center} |
51 \end{center} |
52 |
52 |
53 \end{frame} |
53 \end{frame} |
54 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
54 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
55 |
55 |
56 |
56 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
57 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
57 \begin{frame}[c] |
58 \begin{frame}[c] |
58 %%\frametitle{Test Program} |
59 \frametitle{Parser Combinators} |
59 |
60 |
60 \mbox{}\\[-18mm]\mbox{} |
61 One of the simplest ways to implement a parser, see |
61 |
62 {\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip |
62 {\lstset{language=While}%%\fontsize{10}{12}\selectfont |
63 |
63 \texttt{\lstinputlisting{../progs/while-tests/loops.while}}} |
64 \begin{itemize} |
64 |
65 \item[$\bullet$] build-in library in Scala |
65 \end{frame} |
66 \item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite |
66 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
67 \item[$\bullet$] possible exponential runtime behaviour |
67 |
68 \end{itemize} |
68 |
69 |
|
70 \end{frame} |
|
71 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
72 |
|
73 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
74 \begin{frame}[c] |
|
75 \frametitle{Parser Combinators} |
|
76 |
|
77 Parser combinators: \bigskip |
|
78 |
|
79 \begin{minipage}{1.1\textwidth} |
|
80 \begin{center} |
|
81 \mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} |
|
82 $\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ |
|
83 \end{center} |
|
84 \end{minipage}\bigskip |
|
85 |
|
86 \begin{itemize} |
|
87 \item atomic parsers |
|
88 \item sequencing |
|
89 \item alternative |
|
90 \item semantic action (map-parser) |
|
91 \end{itemize} |
|
92 |
|
93 \end{frame} |
|
94 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
95 |
|
96 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
97 \begin{frame}[c] |
|
98 |
|
99 Atomic parsers, for example, number tokens |
|
100 |
|
101 \begin{center} |
|
102 \bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} |
|
103 \end{center}\bigskip |
|
104 |
|
105 \begin{itemize} |
|
106 \item you consume one or more token from the\\ |
|
107 input (stream) |
|
108 \item also works for characters and strings |
|
109 \end{itemize} |
|
110 \end{frame} |
|
111 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
112 |
|
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
114 \begin{frame}[c] |
|
115 |
|
116 Alternative parser (code \bl{$p\;||\;q$})\bigskip |
|
117 |
|
118 \begin{itemize} |
|
119 \item apply \bl{$p$} and also \bl{$q$}; then combine |
|
120 the outputs |
|
121 \end{itemize} |
|
122 |
|
123 \begin{center} |
|
124 \large \bl{$p(\text{input}) \cup q(\text{input})$} |
|
125 \end{center} |
|
126 |
|
127 \end{frame} |
|
128 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
129 |
|
130 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
131 \begin{frame}[c] |
|
132 |
|
133 Sequence parser (code \bl{$p\sim q$})\bigskip |
|
134 |
|
135 \begin{itemize} |
|
136 \item apply first \bl{$p$} producing a set of pairs |
|
137 \item then apply \bl{$q$} to the unparsed part |
|
138 \item then combine the results:\medskip |
|
139 \begin{center} |
|
140 ((output$_1$, output$_2$), unparsed part) |
|
141 \end{center} |
|
142 \end{itemize} |
|
143 |
|
144 \begin{center} |
|
145 \begin{tabular}{l} |
|
146 \large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] |
|
147 \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] |
|
148 \large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} |
|
149 \end{tabular} |
|
150 \end{center} |
|
151 |
|
152 |
|
153 \end{frame} |
|
154 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
155 |
|
156 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
157 \begin{frame}[c] |
|
158 |
|
159 Map-parser (code \bl{$p.map(f)\;$})\bigskip |
|
160 |
|
161 \begin{itemize} |
|
162 \item apply \bl{$p$} producing a set of pairs |
|
163 \item then apply the function \bl{$f$} to each first component |
|
164 \end{itemize} |
|
165 |
|
166 \begin{center} |
|
167 \begin{tabular}{l} |
|
168 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} |
|
169 \end{tabular} |
|
170 \end{center}\bigskip\bigskip\pause |
|
171 |
|
172 \bl{$f$} is the semantic action (``what to do with the parsed input'') |
|
173 |
|
174 \end{frame} |
|
175 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
176 |
|
177 |
|
178 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
179 \mode<presentation>{ |
|
180 \begin{frame}[c] |
|
181 \frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} |
|
182 |
|
183 Addition |
|
184 |
|
185 \begin{center} |
|
186 \bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} |
|
187 \end{center}\pause |
|
188 |
|
189 Multiplication |
|
190 |
|
191 \begin{center} |
|
192 \bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} |
|
193 \end{center}\pause |
|
194 |
|
195 Parenthesis |
|
196 |
|
197 \begin{center} |
|
198 \bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} |
|
199 \end{center} |
|
200 |
|
201 \end{frame}} |
|
202 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
203 |
|
204 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
205 \begin{frame}[c] |
|
206 \frametitle{Types of Parsers} |
|
207 |
|
208 \begin{itemize} |
|
209 \item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, |
|
210 then \bl{$p \sim q$} returns results of type |
|
211 |
|
212 \begin{center} |
|
213 \bl{$T \times S$} |
|
214 \end{center}\pause |
|
215 |
|
216 \item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, |
|
217 and \bl{$p \;||\; q$} returns results of type |
|
218 |
|
219 \begin{center} |
|
220 \bl{$T$} |
|
221 \end{center}\pause |
|
222 |
|
223 \item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from |
|
224 \bl{$T$} to \bl{$S$}, then |
|
225 \bl{$p \Rightarrow f$} returns results of type |
|
226 |
|
227 \begin{center} |
|
228 \bl{$S$} |
|
229 \end{center} |
|
230 |
|
231 \end{itemize} |
|
232 |
|
233 \end{frame} |
|
234 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
235 |
|
236 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
237 \begin{frame}[c] |
|
238 \frametitle{Input Types of Parsers} |
|
239 |
|
240 \begin{itemize} |
|
241 \item input: \alert{token list} |
|
242 \item output: set of (output\_type, \alert{token list}) |
|
243 \end{itemize}\bigskip\pause |
|
244 |
|
245 actually it can be any input type as long as it is a kind of |
|
246 sequence (for example a string) |
|
247 |
|
248 \end{frame} |
|
249 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
250 |
|
251 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
252 \begin{frame}[c] |
|
253 \frametitle{Scannerless Parsers} |
|
254 |
|
255 \begin{itemize} |
|
256 \item input: \alert{string} |
|
257 \item output: set of (output\_type, \alert{string}) |
|
258 \end{itemize}\bigskip\bigskip |
|
259 |
|
260 but using lexers is better because whitespaces or comments can be |
|
261 filtered out; then input is a sequence of tokens |
|
262 |
|
263 \end{frame} |
|
264 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
265 |
|
266 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
267 \begin{frame}[c] |
|
268 \frametitle{Successful Parses} |
|
269 |
|
270 \begin{itemize} |
|
271 \item input: string |
|
272 \item output: \alert{set of} (output\_type, string) |
|
273 \end{itemize}\bigskip |
|
274 |
|
275 a parse is successful whenever the input has been fully |
|
276 ``consumed'' (that is the second component is empty) |
|
277 |
|
278 \end{frame} |
|
279 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
280 |
|
281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
282 \begin{frame}[c] |
|
283 \frametitle{Abstract Parser Class} |
|
284 |
|
285 \small |
|
286 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
287 {../progs/app7.scala} |
|
288 \end{frame} |
|
289 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
290 |
|
291 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
292 \begin{frame}[c] |
|
293 |
|
294 \small |
|
295 \fontsize{10}{12}\selectfont |
|
296 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
297 {../progs/app8.scala} |
|
298 \end{frame} |
|
299 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
300 |
|
301 |
|
302 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
303 \begin{frame}[c] |
|
304 \frametitle{Two Grammars} |
|
305 |
|
306 Which languages are recognised by the following two grammars? |
|
307 |
|
308 \begin{center} |
|
309 \bl{\begin{tabular}{lcl} |
|
310 $\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ |
|
311 & $|$ & $\epsilon$ |
|
312 \end{tabular}} |
|
313 \end{center}\bigskip |
|
314 |
|
315 \begin{center} |
|
316 \bl{\begin{tabular}{lcl} |
|
317 $\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ |
|
318 & $|$ & $\epsilon$ |
|
319 \end{tabular}} |
|
320 \end{center} |
|
321 |
|
322 \end{frame} |
|
323 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
324 |
|
325 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
326 \begin{frame}[t] |
|
327 \frametitle{Ambiguous Grammars} |
|
328 |
|
329 \begin{center} |
|
330 \begin{tikzpicture} |
|
331 \begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, |
|
332 enlargelimits=false, |
|
333 xtick={0,100,...,1000}, |
|
334 xmax=1050, |
|
335 ymax=33, |
|
336 ytick={0,5,...,30}, |
|
337 scaled ticks=false, |
|
338 axis lines=left, |
|
339 width=11cm, |
|
340 height=7cm, |
|
341 legend entries={unambiguous,ambiguous}, |
|
342 legend pos=north east, |
|
343 legend cell align=left, |
|
344 x tick label style={font=\small,/pgf/number format/1000 sep={}}] |
|
345 \addplot[blue,mark=*, mark options={fill=white}] |
|
346 table {s-grammar1.data}; |
|
347 \only<2>{ |
|
348 \addplot[red,mark=triangle*, mark options={fill=white}] |
|
349 table {s-grammar2.data};} |
|
350 \end{axis} |
|
351 \end{tikzpicture} |
|
352 \end{center} |
|
353 |
|
354 \end{frame} |
|
355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
356 |
69 |
357 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
70 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
358 \begin{frame} |
71 \begin{frame} |
359 \frametitle{While-Language} |
72 \frametitle{While-Language} |
360 \mbox{}\\[-23mm]\mbox{} |
73 \mbox{}\\[-23mm]\mbox{} |
487 \end{frame}} |
213 \end{frame}} |
488 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
214 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
489 |
215 |
490 |
216 |
491 |
217 |
492 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
218 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
493 \begin{frame}[t] |
219 % \begin{frame}[t] |
494 \frametitle{% |
220 % \frametitle{% |
495 \begin{tabular}{@ {}c@ {}} |
221 % \begin{tabular}{@ {}c@ {}} |
496 \\[-3mm] |
222 % \\[-3mm] |
497 \LARGE Compilers and \\[-2mm] |
223 % \LARGE Compilers and \\[-2mm] |
498 \LARGE Formal Languages\\[3mm] |
224 % \LARGE Formal Languages\\[3mm] |
499 \end{tabular}} |
225 % \end{tabular}} |
500 |
226 |
501 \normalsize |
227 % \normalsize |
502 \begin{center} |
228 % \begin{center} |
503 \begin{tabular}{ll} |
229 % \begin{tabular}{ll} |
504 Email: & christian.urban at kcl.ac.uk\\ |
230 % Email: & christian.urban at kcl.ac.uk\\ |
505 %Office Hours: & Thursdays 12 -- 14\\ |
231 % %Office Hours: & Thursdays 12 -- 14\\ |
506 %Location: & N7.07 (North Wing, Bush House)\\ |
232 % %Location: & N7.07 (North Wing, Bush House)\\ |
507 Slides \& Progs: & KEATS (also homework is there)\\ |
233 % Slides \& Progs: & KEATS (also homework is there)\\ |
508 \end{tabular} |
234 % \end{tabular} |
509 \end{center} |
235 % \end{center} |
510 |
236 |
511 \begin{center} |
237 % \begin{center} |
512 \begin{tikzpicture} |
238 % \begin{tikzpicture} |
513 \node[drop shadow,fill=white,inner sep=0pt] |
239 % \node[drop shadow,fill=white,inner sep=0pt] |
514 {\footnotesize\rowcolors{1}{capri!10}{white} |
240 % {\footnotesize\rowcolors{1}{capri!10}{white} |
515 \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline |
241 % \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline |
516 1 Introduction, Languages & \cellcolor{blue!50} 6 While-Language \\ |
242 % 1 Introduction, Languages & \cellcolor{blue!50} 6 While-Language \\ |
517 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ |
243 % 2 Regular Expressions, Derivatives & 7 Compilation, JVM \\ |
518 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ |
244 % 3 Automata, Regular Languages & 8 Compiling Functional Languages \\ |
519 4 Lexing, Tokenising & 9 Optimisations \\ |
245 % 4 Lexing, Tokenising & 9 Optimisations \\ |
520 5 Grammars, Parsing & 10 LLVM \\ \hline |
246 % 5 Grammars, Parsing & 10 LLVM \\ \hline |
521 \end{tabular}% |
247 % \end{tabular}% |
522 }; |
248 % }; |
523 \end{tikzpicture} |
249 % \end{tikzpicture} |
524 \end{center} |
250 % \end{center} |
525 |
251 |
526 \end{frame} |
252 % \end{frame} |
527 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
253 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
528 |
254 |
529 |
255 |
530 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
256 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
531 % \begin{frame}[c] |
257 % \begin{frame}[c] |
532 % \small |
258 % \small |
1242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
968 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
1243 % |
969 % |
1244 |
970 |
1245 \end{document} |
971 \end{document} |
1246 |
972 |
|
973 |
|
974 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
975 \begin{frame}[c] |
|
976 \frametitle{Parser Combinators} |
|
977 |
|
978 One of the simplest ways to implement a parser, see |
|
979 {\small\url{https://vimeo.com/142341803}} (by Haoyi Li)\bigskip |
|
980 |
|
981 \begin{itemize} |
|
982 \item[$\bullet$] build-in library in Scala |
|
983 \item[$\bullet$] fastparse (2) library by Haoyi Li; is part of Ammonite |
|
984 \item[$\bullet$] possible exponential runtime behaviour |
|
985 \end{itemize} |
|
986 |
|
987 \end{frame} |
|
988 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
989 |
|
990 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
991 \begin{frame}[c] |
|
992 \frametitle{Parser Combinators} |
|
993 |
|
994 Parser combinators: \bigskip |
|
995 |
|
996 \begin{minipage}{1.1\textwidth} |
|
997 \begin{center} |
|
998 \mbox{}\hspace{-12mm}\mbox{}$\underbrace{\text{list of tokens}}_{\text{input}}$ \bl{$\Rightarrow$} |
|
999 $\underbrace{\text{set of (parsed input, unparsed input)}}_{\text{output}}$ |
|
1000 \end{center} |
|
1001 \end{minipage}\bigskip |
|
1002 |
|
1003 \begin{itemize} |
|
1004 \item atomic parsers |
|
1005 \item sequencing |
|
1006 \item alternative |
|
1007 \item semantic action (map-parser) |
|
1008 \end{itemize} |
|
1009 |
|
1010 \end{frame} |
|
1011 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1012 |
|
1013 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1014 \begin{frame}[c] |
|
1015 |
|
1016 Atomic parsers, for example, number tokens |
|
1017 |
|
1018 \begin{center} |
|
1019 \bl{$\texttt{Num(123)}::rest \;\Rightarrow\; \{(\texttt{Num(123)}, rest)\}$} |
|
1020 \end{center}\bigskip |
|
1021 |
|
1022 \begin{itemize} |
|
1023 \item you consume one or more token from the\\ |
|
1024 input (stream) |
|
1025 \item also works for characters and strings |
|
1026 \end{itemize} |
|
1027 \end{frame} |
|
1028 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1029 |
|
1030 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1031 \begin{frame}[c] |
|
1032 |
|
1033 Alternative parser (code \bl{$p\;||\;q$})\bigskip |
|
1034 |
|
1035 \begin{itemize} |
|
1036 \item apply \bl{$p$} and also \bl{$q$}; then combine |
|
1037 the outputs |
|
1038 \end{itemize} |
|
1039 |
|
1040 \begin{center} |
|
1041 \large \bl{$p(\text{input}) \cup q(\text{input})$} |
|
1042 \end{center} |
|
1043 |
|
1044 \end{frame} |
|
1045 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1046 |
|
1047 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1048 \begin{frame}[c] |
|
1049 |
|
1050 Sequence parser (code \bl{$p\sim q$})\bigskip |
|
1051 |
|
1052 \begin{itemize} |
|
1053 \item apply first \bl{$p$} producing a set of pairs |
|
1054 \item then apply \bl{$q$} to the unparsed part |
|
1055 \item then combine the results:\medskip |
|
1056 \begin{center} |
|
1057 ((output$_1$, output$_2$), unparsed part) |
|
1058 \end{center} |
|
1059 \end{itemize} |
|
1060 |
|
1061 \begin{center} |
|
1062 \begin{tabular}{l} |
|
1063 \large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] |
|
1064 \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] |
|
1065 \large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} |
|
1066 \end{tabular} |
|
1067 \end{center} |
|
1068 |
|
1069 |
|
1070 \end{frame} |
|
1071 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1072 |
|
1073 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1074 \begin{frame}[c] |
|
1075 |
|
1076 Map-parser (code \bl{$p.map(f)\;$})\bigskip |
|
1077 |
|
1078 \begin{itemize} |
|
1079 \item apply \bl{$p$} producing a set of pairs |
|
1080 \item then apply the function \bl{$f$} to each first component |
|
1081 \end{itemize} |
|
1082 |
|
1083 \begin{center} |
|
1084 \begin{tabular}{l} |
|
1085 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} |
|
1086 \end{tabular} |
|
1087 \end{center}\bigskip\bigskip\pause |
|
1088 |
|
1089 \bl{$f$} is the semantic action (``what to do with the parsed input'') |
|
1090 |
|
1091 \end{frame} |
|
1092 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1093 |
|
1094 |
|
1095 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1096 \mode<presentation>{ |
|
1097 \begin{frame}[c] |
|
1098 \frametitle{\begin{tabular}{c}Semantic Actions\end{tabular}} |
|
1099 |
|
1100 Addition |
|
1101 |
|
1102 \begin{center} |
|
1103 \bl{$\meta{T} \sim + \sim \meta{E} \Rightarrow \underbrace{f\,((x,y), z) \Rightarrow x + z}_{\text{semantic action}}$} |
|
1104 \end{center}\pause |
|
1105 |
|
1106 Multiplication |
|
1107 |
|
1108 \begin{center} |
|
1109 \bl{$\meta{F} \sim * \sim \meta{T} \Rightarrow f\,((x,y), z) \Rightarrow x * z$} |
|
1110 \end{center}\pause |
|
1111 |
|
1112 Parenthesis |
|
1113 |
|
1114 \begin{center} |
|
1115 \bl{$\text{(} \sim \meta{E} \sim \text{)} \Rightarrow f\,((x,y), z) \Rightarrow y$} |
|
1116 \end{center} |
|
1117 |
|
1118 \end{frame}} |
|
1119 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1120 |
|
1121 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1122 \begin{frame}[c] |
|
1123 \frametitle{Types of Parsers} |
|
1124 |
|
1125 \begin{itemize} |
|
1126 \item {\bf Sequencing}: if \bl{$p$} returns results of type \bl{$T$}, and \bl{$q$} results of type \bl{$S$}, |
|
1127 then \bl{$p \sim q$} returns results of type |
|
1128 |
|
1129 \begin{center} |
|
1130 \bl{$T \times S$} |
|
1131 \end{center}\pause |
|
1132 |
|
1133 \item {\bf Alternative}: if \bl{$p$} returns results of type \bl{$T$} then \bl{$q$} \alert{must} also have results of type \bl{$T$}, |
|
1134 and \bl{$p \;||\; q$} returns results of type |
|
1135 |
|
1136 \begin{center} |
|
1137 \bl{$T$} |
|
1138 \end{center}\pause |
|
1139 |
|
1140 \item {\bf Semantic Action}: if \bl{$p$} returns results of type \bl{$T$} and \bl{$f$} is a function from |
|
1141 \bl{$T$} to \bl{$S$}, then |
|
1142 \bl{$p \Rightarrow f$} returns results of type |
|
1143 |
|
1144 \begin{center} |
|
1145 \bl{$S$} |
|
1146 \end{center} |
|
1147 |
|
1148 \end{itemize} |
|
1149 |
|
1150 \end{frame} |
|
1151 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1152 |
|
1153 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1154 \begin{frame}[c] |
|
1155 \frametitle{Input Types of Parsers} |
|
1156 |
|
1157 \begin{itemize} |
|
1158 \item input: \alert{token list} |
|
1159 \item output: set of (output\_type, \alert{token list}) |
|
1160 \end{itemize}\bigskip\pause |
|
1161 |
|
1162 actually it can be any input type as long as it is a kind of |
|
1163 sequence (for example a string) |
|
1164 |
|
1165 \end{frame} |
|
1166 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1167 |
|
1168 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1169 \begin{frame}[c] |
|
1170 \frametitle{Scannerless Parsers} |
|
1171 |
|
1172 \begin{itemize} |
|
1173 \item input: \alert{string} |
|
1174 \item output: set of (output\_type, \alert{string}) |
|
1175 \end{itemize}\bigskip\bigskip |
|
1176 |
|
1177 but using lexers is better because whitespaces or comments can be |
|
1178 filtered out; then input is a sequence of tokens |
|
1179 |
|
1180 \end{frame} |
|
1181 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1182 |
|
1183 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1184 \begin{frame}[c] |
|
1185 \frametitle{Successful Parses} |
|
1186 |
|
1187 \begin{itemize} |
|
1188 \item input: string |
|
1189 \item output: \alert{set of} (output\_type, string) |
|
1190 \end{itemize}\bigskip |
|
1191 |
|
1192 a parse is successful whenever the input has been fully |
|
1193 ``consumed'' (that is the second component is empty) |
|
1194 |
|
1195 \end{frame} |
|
1196 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1197 |
|
1198 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1199 \begin{frame}[c] |
|
1200 \frametitle{Abstract Parser Class} |
|
1201 |
|
1202 \small |
|
1203 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
1204 {../progs/app7.scala} |
|
1205 \end{frame} |
|
1206 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1207 |
|
1208 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1209 \begin{frame}[c] |
|
1210 |
|
1211 \small |
|
1212 \fontsize{10}{12}\selectfont |
|
1213 \lstinputlisting[language=Scala,xleftmargin=1mm] |
|
1214 {../progs/app8.scala} |
|
1215 \end{frame} |
|
1216 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1217 |
|
1218 |
|
1219 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1220 \begin{frame}[c] |
|
1221 \frametitle{Two Grammars} |
|
1222 |
|
1223 Which languages are recognised by the following two grammars? |
|
1224 |
|
1225 \begin{center} |
|
1226 \bl{\begin{tabular}{lcl} |
|
1227 $\meta{S}$ & $\rightarrow$ & $1 \cdot \meta{S} \cdot \meta{S}$\\ |
|
1228 & $|$ & $\epsilon$ |
|
1229 \end{tabular}} |
|
1230 \end{center}\bigskip |
|
1231 |
|
1232 \begin{center} |
|
1233 \bl{\begin{tabular}{lcl} |
|
1234 $\meta{U}$ & $\rightarrow$ & $1 \cdot \meta{U}$\\ |
|
1235 & $|$ & $\epsilon$ |
|
1236 \end{tabular}} |
|
1237 \end{center} |
|
1238 |
|
1239 \end{frame} |
|
1240 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1241 |
|
1242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1243 \begin{frame}[t] |
|
1244 \frametitle{Ambiguous Grammars} |
|
1245 |
|
1246 \begin{center} |
|
1247 \begin{tikzpicture} |
|
1248 \begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, |
|
1249 enlargelimits=false, |
|
1250 xtick={0,100,...,1000}, |
|
1251 xmax=1050, |
|
1252 ymax=33, |
|
1253 ytick={0,5,...,30}, |
|
1254 scaled ticks=false, |
|
1255 axis lines=left, |
|
1256 width=11cm, |
|
1257 height=7cm, |
|
1258 legend entries={unambiguous,ambiguous}, |
|
1259 legend pos=north east, |
|
1260 legend cell align=left, |
|
1261 x tick label style={font=\small,/pgf/number format/1000 sep={}}] |
|
1262 \addplot[blue,mark=*, mark options={fill=white}] |
|
1263 table {s-grammar1.data}; |
|
1264 \only<2>{ |
|
1265 \addplot[red,mark=triangle*, mark options={fill=white}] |
|
1266 table {s-grammar2.data};} |
|
1267 \end{axis} |
|
1268 \end{tikzpicture} |
|
1269 \end{center} |
|
1270 |
|
1271 \end{frame} |
|
1272 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
1273 |
|
1274 |
|
1275 |
|
1276 |
1247 %%% Local Variables: |
1277 %%% Local Variables: |
1248 %%% mode: latex |
1278 %%% mode: latex |
1249 %%% TeX-master: t |
1279 %%% TeX-master: t |
1250 %%% End: |
1280 %%% End: |
1251 |
1281 |