diff -r eeff9953a1c1 -r 487b0c0aef75 slides04.tex --- a/slides04.tex Sun Oct 14 23:41:49 2012 +0100 +++ b/slides04.tex Wed Oct 17 00:58:06 2012 +0100 @@ -105,29 +105,40 @@ \begin{frame}[c] \frametitle{\begin{tabular}{c}Last Week\end{tabular}} -Last week I showed you +Last week I showed you\bigskip \begin{itemize} -\item tokenizer +\item a tokenizer taking a list of regular expressions\bigskip \item tokenization identifies lexeme in an input stream of characters (or string) -and categorizes them into tokens +and cathegorizes them into tokens + +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\item longest match rule (maximal munch rule): The +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Two Rules\end{tabular}} + +\begin{itemize} +\item Longest match rule (maximal munch rule): The longest initial substring matched by any regular expression is taken -as next token. +as next token.\bigskip \item Rule priority: For a particular longest initial substring, the first regular expression that can match determines the token. -\item problem with infix operations, for example i-12 \end{itemize} -\url{http://www.technologyreview.com/tr10/?year=2011} +%\url{http://www.technologyreview.com/tr10/?year=2011} -finite deterministic automata/ nondeterministic automaton +%finite deterministic automata/ nondeterministic automaton +%\item problem with infix operations, for example i-12 \end{frame}} @@ -135,23 +146,39 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} +\begin{frame}[t] \begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} - \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\ - \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ - \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\ - & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\ - & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\ - \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)}\\ - \end{tabular} +\texttt{"if true then then 42 else +"} \end{center} -``the regular expression after \bl{c} has been recognised'' +\only<1>{ +\small\begin{tabular}{l} +KEYWORD(if),\\ +WHITESPACE,\\ +IDENT(true),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +KEYWORD(then),\\ +WHITESPACE,\\ +NUM(42),\\ +WHITESPACE,\\ +KEYWORD(else),\\ +WHITESPACE,\\ +OP(+) +\end{tabular}} + +\only<2>{ +\small\begin{tabular}{l} +KEYWORD(if),\\ +IDENT(true),\\ +KEYWORD(then),\\ +KEYWORD(then),\\ +NUM(42),\\ +KEYWORD(else),\\ +OP(+) +\end{tabular}} \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -160,130 +187,14 @@ \mode{ \begin{frame}[c] -For this we defined the set \bl{Der c A} as -\begin{center} -\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ } -\end{center} - -which is called the semantic derivative of a set -and proved - -\begin{center} -\bl{$L$(der c r) $=$ Der c ($L$(r))} -\end{center} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Idea of the Algorithm\end{tabular}} - -If we want to recognise the string \bl{abc} with regular expression \bl{r} -then\medskip - -\begin{enumerate} -\item \bl{Der a ($L$(r))}\pause -\item \bl{Der b (Der a ($L$(r)))} -\item \bl{Der c (Der b (Der a ($L$(r))))}\pause -\item finally we test whether the empty string is in set\pause\medskip -\end{enumerate} - -The matching algorithm works similarly, just over regular expression than sets. -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -Input: string \bl{abc} and regular expression \bl{r} - -\begin{enumerate} -\item \bl{der a r} -\item \bl{der b (der a r)} -\item \bl{der c (der b (der a r))}\pause -\item finally check whether the latter regular expression can match the empty string -\end{enumerate} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] - -We need to prove +There is one small problem with the tokenizer. How should we +tokenize: \begin{center} -\bl{$L$(der c r) $=$ Der c ($L$(r))} +\texttt{"x - 3"} \end{center} -by induction on the regular expression. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip -\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip -\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already -holds for \bl{r$_1$} and \bl{r$_2$}. -\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already -holds for \bl{r}. -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Proofs about Natural Numbers\\ and Strings\end{tabular}} - -\begin{itemize} -\item \bl{$P$} holds for \bl{$0$} and -\item \bl{$P$} holds for \bl{$n + 1$} under the assumption that \bl{$P$} already -holds for \bl{$n$} -\end{itemize}\bigskip - -\begin{itemize} -\item \bl{$P$} holds for \bl{\texttt{""}} and -\item \bl{$P$} holds for \bl{$c\!::\!s$} under the assumption that \bl{$P$} already -holds for \bl{$s$} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -\begin{center} - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular}\bigskip\pause - \end{center} - \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -293,11 +204,77 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Languages\end{tabular}} +\frametitle{\begin{tabular}{c}Automata\end{tabular}} + +A deterministic finite automaton consists of: + +\begin{itemize} +\item a finite set of states +\item one of these states is the start state +\item some states are accepting states, and +\item there is transition function\medskip + +\small +which takes a state and a character as arguments and produces a new state\smallskip\\ +this function might not always be defined everywhere +\end{itemize} + +\begin{center} +\bl{$A(Q, q_0, F, \delta)$} +\end{center} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.7]{pics/ch3.jpg} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -A \alert{language} is a set of strings.\bigskip +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Accepting a String\end{tabular}} + +\begin{center} +\bl{$A(Q, q_0, F, \delta)$} +\end{center}\bigskip + +\begin{center} +\begin{tabular}{l} +\bl{$\hat{\delta}(\texttt{""}, q) = q$}\\ +\bl{$\hat{\delta}(c::s, q) = \hat{\delta}(s, \delta(c, q))$}\\ +\end{tabular} +\end{center}\bigskip\pause -A \alert{regular expression} specifies a set of strings or language.\bigskip +\begin{center} +Accepting? \hspace{5mm}\bl{$\hat{\delta}(s, q_0) \in F$} +\end{center} +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{center} +\includegraphics[scale=0.7]{pics/ch4.jpg} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Languages\end{tabular}} A language is \alert{regular} iff there exists a regular expression that recognises all its strings.\bigskip\bigskip\pause @@ -306,57 +283,16 @@ \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} - -\begin{center} - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ - \end{tabular}\bigskip - \end{center} - -How about ranges \bl{[a-z]}, \bl{r$^\text{+}$} and \bl{!r}? - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Negation of Regular Expr's\end{tabular}} - -\begin{itemize} -\item \bl{!r} \hspace{6mm} (everything that \bl{r} cannot recognise)\medskip -\item \bl{$L$(!r) $\dn$ UNIV - $L$(r)}\medskip -\item \bl{nullable (!r) $\dn$ not (nullable(r))}\medskip -\item \bl{der\,c\,(!r) $\dn$ !(der\,c\,r)} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Regular Exp's for Lexing\end{tabular}} - -Lexing separates strings into ``words'' / components. - \begin{itemize} -\item Identifiers (non-empty strings of letters or digits, starting with a letter) -\item Numbers (non-empty sequences of digits omitting leading zeros) -\item Keywords (else, if, while, \ldots) -\item White space (a non-empty sequence of blanks, newlines and tabs) -\item Comments +\item Assuming you have the alphabet \bl{\{a, b, c\}}\bigskip +\item Give a regular expression that can recognise all strings that have at least one \bl{b}. \end{itemize} \end{frame}} @@ -365,19 +301,22 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Automata\end{tabular}} -A deterministic finite automaton consists of: + \begin{itemize} -\item a set of states -\item one of these states is the start state -\item some states are accepting states, and -\item there is transition function\medskip +\item The star-case in our proof needs the following lemma +\begin{center} +\bl{Der\,c\,A$^*$ $=$ (Der c A)\,@\, A$^*$} +\end{center} +\end{itemize}\bigskip\bigskip -\small -which takes a state as argument and a character and produces a new state\smallskip\\ -this function might not always be defined + + +\begin{itemize} +\item If \bl{\texttt{""} $\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B $\cup$ (Der\,c\,B)}\medskip +\item If \bl{\texttt{""} $\not\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B} + \end{itemize} \end{frame}}