89 \end{center} |
90 \end{center} |
90 |
91 |
91 \end{frame} |
92 \end{frame} |
92 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
93 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
93 |
94 |
94 |
95 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
95 |
|
96 |
|
97 \newcommand{\qq}{\mbox{\texttt{"}}} |
|
98 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
99 \begin{frame}[c] |
|
100 \frametitle{CFGs} |
|
101 |
|
102 A \alert{context-free} grammar (CFG) \bl{$G$} consists of: |
|
103 |
|
104 \begin{itemize} |
|
105 \item a finite set of nonterminal symbols (upper case) |
|
106 \item a finite terminal symbols or tokens (lower case) |
|
107 \item a start symbol (which must be a nonterminal) |
|
108 \item a set of rules |
|
109 \begin{center} |
|
110 \bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} |
|
111 \end{center} |
|
112 |
|
113 where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause |
|
114 |
|
115 \end{itemize} |
|
116 |
|
117 \end{frame} |
|
118 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
119 |
|
120 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
121 \mode<presentation>{ |
|
122 \begin{frame}[c] |
96 \begin{frame}[c] |
123 \frametitle{\begin{tabular}{c}Hierarchy of Languages\end{tabular}} |
97 \frametitle{\begin{tabular}{c}Hierarchy of Languages\end{tabular}} |
124 |
98 |
125 Recall that languages are sets of strings. |
99 Recall that languages are sets of strings. |
126 |
100 |
135 \draw (0,-1.4) node [rect] {regular languages}; |
109 \draw (0,-1.4) node [rect] {regular languages}; |
136 \end{tikzpicture} |
110 \end{tikzpicture} |
137 |
111 |
138 \end{center} |
112 \end{center} |
139 |
113 |
140 \end{frame}} |
114 \end{frame} |
|
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
116 |
|
117 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
118 \begin{frame}[c] |
|
119 \frametitle{Two Grammars} |
|
120 |
|
121 Which languages are recognised by the following two grammars? |
|
122 |
|
123 \begin{center} |
|
124 \bl{\begin{tabular}{lcl} |
|
125 $S$ & $\rightarrow$ & $1 \cdot S \cdot S$\\ |
|
126 & $|$ & $\epsilon$ |
|
127 \end{tabular}} |
|
128 \end{center}\bigskip |
|
129 |
|
130 \begin{center} |
|
131 \bl{\begin{tabular}{lcl} |
|
132 $U$ & $\rightarrow$ & $1 \cdot U$\\ |
|
133 & $|$ & $\epsilon$ |
|
134 \end{tabular}} |
|
135 \end{center} |
|
136 |
|
137 \end{frame} |
|
138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
139 |
|
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
141 \begin{frame}[c] |
|
142 |
|
143 Atomic parsers, for example |
|
144 |
|
145 \begin{center} |
|
146 \bl{$1::rest \;\Rightarrow\; \{(1, rest)\}$} |
|
147 \end{center}\bigskip |
|
148 |
|
149 \begin{itemize} |
|
150 \item you consume one or more token from the\\ |
|
151 input (stream) |
|
152 \item also works for characters and strings |
|
153 \end{itemize} |
|
154 \end{frame} |
|
155 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
156 |
|
157 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
158 \begin{frame}[c] |
|
159 |
|
160 Alternative parser (code \bl{$p\;||\;q$})\bigskip |
|
161 |
|
162 \begin{itemize} |
|
163 \item apply \bl{$p$} and also \bl{$q$}; then combine |
|
164 the outputs |
|
165 \end{itemize} |
|
166 |
|
167 \begin{center} |
|
168 \large \bl{$p(\text{input}) \cup q(\text{input})$} |
|
169 \end{center} |
|
170 |
|
171 \end{frame} |
|
172 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
173 |
|
174 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
175 \begin{frame}[c] |
|
176 |
|
177 Sequence parser (code \bl{$p\sim q$})\bigskip |
|
178 |
|
179 \begin{itemize} |
|
180 \item apply first \bl{$p$} producing a set of pairs |
|
181 \item then apply \bl{$q$} to the unparsed parts |
|
182 \item then combine the results:\medskip |
|
183 \begin{center} |
|
184 ((output$_1$, output$_2$), unparsed part) |
|
185 \end{center} |
|
186 \end{itemize} |
|
187 |
|
188 \begin{center} |
|
189 \begin{tabular}{l} |
|
190 \large \bl{$\{((o_1, o_2), u_2) \;|\;$}\\[2mm] |
|
191 \large\mbox{}\hspace{15mm} \bl{$(o_1, u_1) \in p(\text{input}) \wedge$}\\[2mm] |
|
192 \large\mbox{}\hspace{15mm} \bl{$(o_2, u_2) \in q(u_1)\}$} |
|
193 \end{tabular} |
|
194 \end{center} |
|
195 |
|
196 |
|
197 \end{frame} |
|
198 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
199 |
|
200 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
201 \begin{frame}[c] |
|
202 |
|
203 Function parser (code \bl{$p \Rightarrow f\;$})\bigskip |
|
204 |
|
205 \begin{itemize} |
|
206 \item apply \bl{$p$} producing a set of pairs |
|
207 \item then apply the function \bl{$f$} to each first component |
|
208 \end{itemize} |
|
209 |
|
210 \begin{center} |
|
211 \begin{tabular}{l} |
|
212 \large \bl{$\{(f(o_1), u_1) \;|\; (o_1, u_1) \in p(\text{input})\}$} |
|
213 \end{tabular} |
|
214 \end{center} |
|
215 |
|
216 \end{frame} |
|
217 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
218 |
|
219 |
|
220 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
221 \begin{frame}[t] |
|
222 \frametitle{Ambiguous Grammars} |
|
223 |
|
224 \begin{center} |
|
225 \begin{tikzpicture} |
|
226 \begin{axis}[xlabel={\pcode{1}s},ylabel={time in secs}, |
|
227 enlargelimits=false, |
|
228 xtick={0,100,...,1000}, |
|
229 xmax=1050, |
|
230 ymax=33, |
|
231 ytick={0,5,...,30}, |
|
232 scaled ticks=false, |
|
233 axis lines=left, |
|
234 width=11cm, |
|
235 height=7cm, |
|
236 legend entries={unambiguous,ambiguous}, |
|
237 legend pos=north east, |
|
238 legend cell align=left, |
|
239 x tick label style={font=\small,/pgf/number format/1000 sep={}}] |
|
240 \addplot[blue,mark=*, mark options={fill=white}] |
|
241 table {s-grammar1.data}; |
|
242 \only<2>{ |
|
243 \addplot[red,mark=triangle*, mark options={fill=white}] |
|
244 table {s-grammar2.data};} |
|
245 \end{axis} |
|
246 \end{tikzpicture} |
|
247 \end{center} |
|
248 |
|
249 \end{frame} |
141 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
250 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
142 |
251 |
143 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
252 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
144 \mode<presentation>{ |
253 \mode<presentation>{ |
145 \begin{frame}[c] |
254 \begin{frame}[c] |
184 \end{center} |
293 \end{center} |
185 |
294 |
186 |
295 |
187 \end{frame}} |
296 \end{frame}} |
188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
297 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
189 |
|
190 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
191 \mode<presentation>{ |
|
192 \begin{frame}[c] |
|
193 \frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}} |
|
194 |
|
195 |
|
196 To disambiguate |
|
197 |
|
198 \begin{center} |
|
199 \bl{\begin{tabular}{lcl} |
|
200 $E$ & $\rightarrow$ & $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\ |
|
201 \end{tabular}} |
|
202 \end{center} |
|
203 |
|
204 Decide on how many precedence levels, say\medskip\\ |
|
205 \hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+} |
|
206 |
|
207 \begin{center} |
|
208 \bl{\begin{tabular}{lcl} |
|
209 $E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\ |
|
210 $E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\ |
|
211 $E_{hi}$ & $\rightarrow$ & $( \cdot E_{low} \cdot ) \;|\;N$ \\ |
|
212 \end{tabular}} |
|
213 \end{center}\pause |
|
214 |
|
215 \small What happens with \bl{$1 + 3 + 4$}? |
|
216 \end{frame}} |
|
217 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
218 |
298 |
219 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
299 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
220 \mode<presentation>{ |
300 \mode<presentation>{ |
221 \begin{frame}[c] |
301 \begin{frame}[c] |
222 \frametitle{\begin{tabular}{c}Removing Left-Recursion\end{tabular}} |
302 \frametitle{\begin{tabular}{c}Removing Left-Recursion\end{tabular}} |
255 $N'$ & $\rightarrow$ & $N \cdot N' \;|\; \epsilon$\\ |
335 $N'$ & $\rightarrow$ & $N \cdot N' \;|\; \epsilon$\\ |
256 \end{tabular}} |
336 \end{tabular}} |
257 \end{center} |
337 \end{center} |
258 |
338 |
259 |
339 |
|
340 \end{frame}} |
|
341 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
342 |
|
343 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
344 \mode<presentation>{ |
|
345 \begin{frame}[c] |
|
346 \frametitle{\begin{tabular}{c}Operator Precedences\end{tabular}} |
|
347 |
|
348 |
|
349 To disambiguate |
|
350 |
|
351 \begin{center} |
|
352 \bl{\begin{tabular}{lcl} |
|
353 $E$ & $\rightarrow$ & $E \cdot + \cdot E \;|\;E \cdot * \cdot E \;|\;( \cdot E \cdot ) \;|\;N$ \\ |
|
354 \end{tabular}} |
|
355 \end{center} |
|
356 |
|
357 Decide on how many precedence levels, say\medskip\\ |
|
358 \hspace{5mm}highest for \bl{$()$}, medium for \bl{*}, lowest for \bl{+} |
|
359 |
|
360 \begin{center} |
|
361 \bl{\begin{tabular}{lcl} |
|
362 $E_{low}$ & $\rightarrow$ & $E_{med} \cdot + \cdot E_{low} \;|\; E_{med}$ \\ |
|
363 $E_{med}$ & $\rightarrow$ & $E_{hi} \cdot * \cdot E_{med} \;|\; E_{hi}$\\ |
|
364 $E_{hi}$ & $\rightarrow$ & $( \cdot E_{low} \cdot ) \;|\;N$ \\ |
|
365 \end{tabular}} |
|
366 \end{center}\pause |
|
367 |
|
368 \small What happens with \bl{$1 + 3 + 4$}? |
260 \end{frame}} |
369 \end{frame}} |
261 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
370 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
262 |
371 |
263 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
372 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
264 \mode<presentation>{ |
373 \mode<presentation>{ |
363 |
472 |
364 \end{frame}} |
473 \end{frame}} |
365 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
474 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
366 |
475 |
367 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
476 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
368 \mode<presentation>{ |
477 \begin{frame}[c] |
|
478 \frametitle{The Goal of this Course} |
|
479 \mbox{}\\[-26mm]\mbox{} |
|
480 |
|
481 \begin{center} |
|
482 \begin{tikzpicture}[scale=1, |
|
483 node/.style={ |
|
484 rectangle,rounded corners=3mm, |
|
485 very thick,draw=black!50, |
|
486 minimum height=18mm, minimum width=20mm, |
|
487 top color=white,bottom color=black!20}] |
|
488 |
|
489 \node at (3.05, 1.8) {\Large\bf Write a Compiler}; |
|
490 |
|
491 \node (0) at (-2.3,0) {}; |
|
492 |
|
493 \node (A) at (0,0) [node] {}; |
|
494 \node [below right] at (A.north west) {lexer}; |
|
495 |
|
496 \node (B) at (3,0) [node] {}; |
|
497 \node [below right=1mm] at (B.north west) |
|
498 {\mbox{}\hspace{-1mm}parser}; |
|
499 |
|
500 \node (C) at (6,0) [node] {}; |
|
501 \node [below right] at (C.north west) |
|
502 {\mbox{}\hspace{-1mm}code gen}; |
|
503 |
|
504 \node (1) at (8.4,0) {}; |
|
505 |
|
506 \draw [->,line width=4mm] (0) -- (A); |
|
507 \draw [->,line width=4mm] (A) -- (B); |
|
508 \draw [->,line width=4mm] (B) -- (C); |
|
509 \draw [->,line width=4mm] (C) -- (1); |
|
510 \end{tikzpicture} |
|
511 \end{center} |
|
512 |
|
513 We have lexer and parser. |
|
514 |
|
515 \end{frame} |
|
516 |
|
517 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
369 \begin{frame}[c] |
518 \begin{frame}[c] |
370 |
519 |
371 \begin{center} |
520 \begin{center} |
372 \bl{\begin{tabular}{@{}lcl@{}} |
521 \bl{\begin{tabular}{@{}lcl@{}} |
373 \textit{Stmt} & $\rightarrow$ & $\texttt{skip}$\\ |
522 \textit{Stmt} & $\rightarrow$ & $\texttt{skip}$\\ |