260 \item a finite set of nonterminal symbols (upper case) |
114 \item a finite set of nonterminal symbols (upper case) |
261 \item a finite terminal symbols or tokens (lower case) |
115 \item a finite terminal symbols or tokens (lower case) |
262 \item a start symbol (which must be a nonterminal) |
116 \item a start symbol (which must be a nonterminal) |
263 \item a set of rules |
117 \item a set of rules |
264 \begin{center} |
118 \begin{center} |
265 \bl{$A \rightarrow \text{rhs}$} |
119 \bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} |
266 \end{center} |
120 \end{center} |
267 |
121 |
268 where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause |
122 where \bl{rhs} are sequences involving terminals and nonterminals (can also be empty).\medskip\pause |
269 |
123 |
270 We can also allow rules |
|
271 \begin{center} |
|
272 \bl{$A \rightarrow \text{rhs}_1 | \text{rhs}_2 | \ldots$} |
|
273 \end{center} |
|
274 \end{itemize} |
124 \end{itemize} |
275 |
125 |
276 |
126 \end{frame}} |
277 \end{frame}} |
127 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
278 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
128 |
279 |
129 |
280 |
130 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
131 \mode<presentation>{ |
282 \mode<presentation>{ |
132 \begin{frame}[c] |
283 \begin{frame}[c] |
133 \frametitle{\begin{tabular}{c}Hierarchie of Languages\end{tabular}} |
284 \frametitle{\begin{tabular}{c}A CFG Derivation\end{tabular}} |
134 |
285 |
135 Recall that languages are sets of strings. |
286 \begin{enumerate} |
136 |
287 \item Begin with a string with only the start symbol \bl{$S$}\bigskip |
137 \begin{center} |
288 \item Replace any non-terminal \bl{$X$} in the string by the |
138 \begin{tikzpicture} |
289 right-hand side of some production \bl{$X \rightarrow \text{rhs}$}\bigskip |
139 [rect/.style={draw=black!50, top color=white,bottom color=black!20, rectangle, very thick, rounded corners}] |
290 \item Repeat 2 until there are no non-terminals |
140 |
291 \end{enumerate} |
141 \draw (0,0) node [rect, text depth=39mm, text width=68mm] {all languages}; |
292 |
142 \draw (0,-0.4) node [rect, text depth=28.5mm, text width=64mm] {decidable languages}; |
293 \begin{center} |
143 \draw (0,-0.85) node [rect, text depth=17mm] {context sensitive languages}; |
294 \bl{$S \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots \rightarrow \ldots $} |
144 \draw (0,-1.14) node [rect, text depth=9mm, text width=50mm] {context-free languages}; |
295 \end{center} |
145 \draw (0,-1.4) node [rect] {regular languages}; |
296 |
146 \end{tikzpicture} |
297 \end{frame}} |
147 |
298 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
148 \end{center} |
299 |
149 |
300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
150 \end{frame}} |
301 \mode<presentation>{ |
151 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
302 \begin{frame}[c] |
152 |
303 \frametitle{\begin{tabular}{c}Language of a CFG\end{tabular}} |
153 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
304 |
154 \mode<presentation>{ |
305 Let \bl{$G$} be a context-free grammar with start symbol \bl{$S$}. |
155 \begin{frame}[t] |
306 Then the language \bl{$L(G)$} is: |
156 \frametitle{\begin{tabular}{c}Numbers\end{tabular}} |
307 |
157 |
308 \begin{center} |
158 A grammar for numbers: |
309 \bl{$\{c_1\ldots c_n \;|\; \forall i.\; c_i \in T \wedge S \rightarrow^* c_1\ldots c_n \}$} |
159 |
310 \end{center}\pause |
160 \begin{center} |
|
161 \bl{\begin{tabular}{lcl} |
|
162 $N$ & $\rightarrow$ & $N \cdot N \;|\; 0 \;|\; 1 \;|\; \ldots \;|\; 9$\\ |
|
163 \end{tabular}} |
|
164 \end{center} |
|
165 |
|
166 Unfortunately left-recursive (and ambiguous).\medskip\\ |
|
167 A problem for \alert{recursive descent parsers} (e.g.~parser combinators). |
|
168 \bigskip\pause |
|
169 |
|
170 A non-left-recursive grammar for numbers |
|
171 |
|
172 \begin{center} |
|
173 \bl{\begin{tabular}{lcl} |
|
174 $N$ & $\rightarrow$ & $0 \cdot N \;|\;1 \cdot N \;|\ldots\;|\; 0 \;|\; 1 \;|\; \ldots \;|\; 9$\\ |
|
175 \end{tabular}} |
|
176 \end{center} |
|
177 |
|
178 |
|
179 \end{frame}} |
|
180 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
181 |
|
182 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
183 \mode<presentation>{ |
|
184 \begin{frame}[c] |
|
185 \frametitle{\begin{tabular}{c}Chomsky Normal Form\end{tabular}} |
|
186 |
|
187 All rules must be of the form |
|
188 |
|
189 \begin{center} |
|
190 \bl{$A \rightarrow a$} |
|
191 \end{center} |
|
192 |
|
193 or |
|
194 |
|
195 \begin{center} |
|
196 \bl{$A \rightarrow B\cdot C$} |
|
197 \end{center} |
|
198 |
|
199 |
|
200 |
|
201 \end{frame}} |
|
202 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
203 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
204 \mode<presentation>{ |
|
205 \begin{frame}[c] |
|
206 \frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} |
|
207 |
|
208 |
|
209 \begin{center} |
|
210 \bl{\begin{tabular}{@ {}lcl} |
|
211 $S$ & $\rightarrow$ & $N\cdot P$ \\ |
|
212 $P$ & $\rightarrow$ & $V\cdot N$ \\ |
|
213 $N$ & $\rightarrow$ & $N\cdot N$ \\ |
|
214 $N$ & $\rightarrow$ & $\texttt{students} \;|\; \texttt{Jeff} \;|\; \texttt{geometry} \;|\; \texttt{trains} $ \\ |
|
215 $V$ & $\rightarrow$ & $\texttt{trains}$ |
|
216 \end{tabular}} |
|
217 \end{center} |
|
218 |
|
219 \bl{\texttt{Jeff trains geometry students}} |
|
220 |
|
221 \end{frame}} |
|
222 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
223 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
224 \mode<presentation>{ |
|
225 \begin{frame}[c] |
|
226 \frametitle{\begin{tabular}{c}CYK Algorithm\end{tabular}} |
|
227 |
311 |
228 |
312 \begin{itemize} |
229 \begin{itemize} |
313 \item Terminals are so-called because there are no rules for replacing them |
230 \item runtime is \bl{$O(n^3)$}\bigskip |
314 \item Once generated, terminals are ``permanent'' |
231 \item grammars need to be transferred into CNF |
315 \item Terminals ought to be tokens of the language (at least in this course) |
|
316 \end{itemize} |
232 \end{itemize} |
317 |
233 |
318 \end{frame}} |
234 \end{frame}} |
319 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
235 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
320 |
|
321 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
322 \mode<presentation>{ |
|
323 \begin{frame}[c] |
|
324 \frametitle{\begin{tabular}{c}Arithmetic Expressions\end{tabular}} |
|
325 |
|
326 \begin{center} |
|
327 \bl{\begin{tabular}{lcl} |
|
328 $E$ & $\rightarrow$ & $num\_token$ \\ |
|
329 $E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\ |
|
330 $E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\ |
|
331 $E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\ |
|
332 $E$ & $\rightarrow$ & $( \cdot E \cdot )$ |
|
333 \end{tabular}} |
|
334 \end{center}\pause\bigskip |
|
335 |
|
336 A CFG is \alert{left-recursive} if it has a nonterminal \bl{$E$} such |
|
337 that \bl{$E \rightarrow^+ E\cdot \ldots$} |
|
338 |
|
339 \end{frame}} |
|
340 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
341 |
236 |
342 |
237 |
343 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
238 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
344 \mode<presentation>{ |
239 \mode<presentation>{ |
345 \begin{frame}[c] |
240 \begin{frame}[c] |