--- a/automata.scala Sun Oct 14 23:41:49 2012 +0100
+++ b/automata.scala Wed Oct 17 00:58:06 2012 +0100
@@ -69,15 +69,15 @@
type States = Set[State]
type Transition = Map[(State, Char), State]
+// we use as an alphabet all lowercase letters
+val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet
+
def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = {
- val qc : State = der(q, c)
- if (qs.contains(qc)) (qs, delta + ((q, c) -> q))
- else explore(qs + qc, delta + ((q, c) -> qc), qc)
+ val q_der : State = der(q, c)
+ if (qs.contains(q_der)) (qs, delta + ((q, c) -> q))
+ else explore(qs + q_der, delta + ((q, c) -> q_der), q_der)
}
-// we use as an alphabet all lowercase letters
-val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet
-
def explore (qs: States, delta: Transition, q: State) : (States, Transition) =
alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2))
Binary file slides04.pdf has changed
--- a/slides04.tex Sun Oct 14 23:41:49 2012 +0100
+++ b/slides04.tex Wed Oct 17 00:58:06 2012 +0100
@@ -105,29 +105,40 @@
\begin{frame}[c]
\frametitle{\begin{tabular}{c}Last Week\end{tabular}}
-Last week I showed you
+Last week I showed you\bigskip
\begin{itemize}
-\item tokenizer
+\item a tokenizer taking a list of regular expressions\bigskip
\item tokenization identifies lexeme in an input stream of characters (or string)
-and categorizes them into tokens
+and cathegorizes them into tokens
+
+\end{itemize}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\item longest match rule (maximal munch rule): The
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Two Rules\end{tabular}}
+
+\begin{itemize}
+\item Longest match rule (maximal munch rule): The
longest initial substring matched by any regular expression is taken
-as next token.
+as next token.\bigskip
\item Rule priority:
For a particular longest initial substring, the first regular
expression that can match determines the token.
-\item problem with infix operations, for example i-12
\end{itemize}
-\url{http://www.technologyreview.com/tr10/?year=2011}
+%\url{http://www.technologyreview.com/tr10/?year=2011}
-finite deterministic automata/ nondeterministic automaton
+%finite deterministic automata/ nondeterministic automaton
+%\item problem with infix operations, for example i-12
\end{frame}}
@@ -135,23 +146,39 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}}
+\begin{frame}[t]
\begin{center}
-\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
- \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\
- \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\
- \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then $\epsilon$ else $\varnothing$} & \\
- \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\
- \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{if nullable r$_1$}\\
- & & \bl{then ((der c r$_1$) $\cdot$ r$_2$) + (der c r$_2$)}\\
- & & \bl{else (der c r$_1$) $\cdot$ r$_2$}\\
- \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)}\\
- \end{tabular}
+\texttt{"if true then then 42 else +"}
\end{center}
-``the regular expression after \bl{c} has been recognised''
+\only<1>{
+\small\begin{tabular}{l}
+KEYWORD(if),\\
+WHITESPACE,\\
+IDENT(true),\\
+WHITESPACE,\\
+KEYWORD(then),\\
+WHITESPACE,\\
+KEYWORD(then),\\
+WHITESPACE,\\
+NUM(42),\\
+WHITESPACE,\\
+KEYWORD(else),\\
+WHITESPACE,\\
+OP(+)
+\end{tabular}}
+
+\only<2>{
+\small\begin{tabular}{l}
+KEYWORD(if),\\
+IDENT(true),\\
+KEYWORD(then),\\
+KEYWORD(then),\\
+NUM(42),\\
+KEYWORD(else),\\
+OP(+)
+\end{tabular}}
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -160,130 +187,14 @@
\mode<presentation>{
\begin{frame}[c]
-For this we defined the set \bl{Der c A} as
-\begin{center}
-\bl{Der c A $\dn$ $\{$ s $|$ c::s $\in$ A$\}$ }
-\end{center}
-
-which is called the semantic derivative of a set
-and proved
-
-\begin{center}
-\bl{$L$(der c r) $=$ Der c ($L$(r))}
-\end{center}
-
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}The Idea of the Algorithm\end{tabular}}
-
-If we want to recognise the string \bl{abc} with regular expression \bl{r}
-then\medskip
-
-\begin{enumerate}
-\item \bl{Der a ($L$(r))}\pause
-\item \bl{Der b (Der a ($L$(r)))}
-\item \bl{Der c (Der b (Der a ($L$(r))))}\pause
-\item finally we test whether the empty string is in set\pause\medskip
-\end{enumerate}
-
-The matching algorithm works similarly, just over regular expression than sets.
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-
-Input: string \bl{abc} and regular expression \bl{r}
-
-\begin{enumerate}
-\item \bl{der a r}
-\item \bl{der b (der a r)}
-\item \bl{der c (der b (der a r))}\pause
-\item finally check whether the latter regular expression can match the empty string
-\end{enumerate}
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-
-We need to prove
+There is one small problem with the tokenizer. How should we
+tokenize:
\begin{center}
-\bl{$L$(der c r) $=$ Der c ($L$(r))}
+\texttt{"x - 3"}
\end{center}
-by induction on the regular expression.
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Proofs about Rexp\end{tabular}}
-
-\begin{itemize}
-\item \bl{$P$} holds for \bl{$\varnothing$}, \bl{$\epsilon$} and \bl{c}\bigskip
-\item \bl{$P$} holds for \bl{r$_1$ + r$_2$} under the assumption that \bl{$P$} already
-holds for \bl{r$_1$} and \bl{r$_2$}.\bigskip
-\item \bl{$P$} holds for \bl{r$_1$ $\cdot$ r$_2$} under the assumption that \bl{$P$} already
-holds for \bl{r$_1$} and \bl{r$_2$}.
-\item \bl{$P$} holds for \bl{r$^*$} under the assumption that \bl{$P$} already
-holds for \bl{r}.
-\end{itemize}
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Proofs about Natural Numbers\\ and Strings\end{tabular}}
-
-\begin{itemize}
-\item \bl{$P$} holds for \bl{$0$} and
-\item \bl{$P$} holds for \bl{$n + 1$} under the assumption that \bl{$P$} already
-holds for \bl{$n$}
-\end{itemize}\bigskip
-
-\begin{itemize}
-\item \bl{$P$} holds for \bl{\texttt{""}} and
-\item \bl{$P$} holds for \bl{$c\!::\!s$} under the assumption that \bl{$P$} already
-holds for \bl{$s$}
-\end{itemize}
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[t]
-\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}}
-
-\begin{center}
- \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
- \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\
- & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\
- & \bl{$\mid$} & \bl{c} & character\\
- & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\
- & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\
- & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\
- \end{tabular}\bigskip\pause
- \end{center}
-
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -293,11 +204,77 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Languages\end{tabular}}
+\frametitle{\begin{tabular}{c}Automata\end{tabular}}
+
+A deterministic finite automaton consists of:
+
+\begin{itemize}
+\item a finite set of states
+\item one of these states is the start state
+\item some states are accepting states, and
+\item there is transition function\medskip
+
+\small
+which takes a state and a character as arguments and produces a new state\smallskip\\
+this function might not always be defined everywhere
+\end{itemize}
+
+\begin{center}
+\bl{$A(Q, q_0, F, \delta)$}
+\end{center}
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+
+\begin{center}
+\includegraphics[scale=0.7]{pics/ch3.jpg}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-A \alert{language} is a set of strings.\bigskip
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[t]
+\frametitle{\begin{tabular}{c}Accepting a String\end{tabular}}
+
+\begin{center}
+\bl{$A(Q, q_0, F, \delta)$}
+\end{center}\bigskip
+
+\begin{center}
+\begin{tabular}{l}
+\bl{$\hat{\delta}(\texttt{""}, q) = q$}\\
+\bl{$\hat{\delta}(c::s, q) = \hat{\delta}(s, \delta(c, q))$}\\
+\end{tabular}
+\end{center}\bigskip\pause
-A \alert{regular expression} specifies a set of strings or language.\bigskip
+\begin{center}
+Accepting? \hspace{5mm}\bl{$\hat{\delta}(s, q_0) \in F$}
+\end{center}
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+
+\begin{center}
+\includegraphics[scale=0.7]{pics/ch4.jpg}
+\end{center}
+
+\end{frame}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mode<presentation>{
+\begin{frame}[c]
+\frametitle{\begin{tabular}{c}Languages\end{tabular}}
A language is \alert{regular} iff there exists
a regular expression that recognises all its strings.\bigskip\bigskip\pause
@@ -306,57 +283,16 @@
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[t]
-\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}}
-
-\begin{center}
- \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
- \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\
- & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\
- & \bl{$\mid$} & \bl{c} & character\\
- & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\
- & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\
- & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\
- \end{tabular}\bigskip
- \end{center}
-
-How about ranges \bl{[a-z]}, \bl{r$^\text{+}$} and \bl{!r}?
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Negation of Regular Expr's\end{tabular}}
-
-\begin{itemize}
-\item \bl{!r} \hspace{6mm} (everything that \bl{r} cannot recognise)\medskip
-\item \bl{$L$(!r) $\dn$ UNIV - $L$(r)}\medskip
-\item \bl{nullable (!r) $\dn$ not (nullable(r))}\medskip
-\item \bl{der\,c\,(!r) $\dn$ !(der\,c\,r)}
-\end{itemize}
-
-\end{frame}}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
-\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Regular Exp's for Lexing\end{tabular}}
-
-Lexing separates strings into ``words'' / components.
-
\begin{itemize}
-\item Identifiers (non-empty strings of letters or digits, starting with a letter)
-\item Numbers (non-empty sequences of digits omitting leading zeros)
-\item Keywords (else, if, while, \ldots)
-\item White space (a non-empty sequence of blanks, newlines and tabs)
-\item Comments
+\item Assuming you have the alphabet \bl{\{a, b, c\}}\bigskip
+\item Give a regular expression that can recognise all strings that have at least one \bl{b}.
\end{itemize}
\end{frame}}
@@ -365,19 +301,22 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}Automata\end{tabular}}
-A deterministic finite automaton consists of:
+
\begin{itemize}
-\item a set of states
-\item one of these states is the start state
-\item some states are accepting states, and
-\item there is transition function\medskip
+\item The star-case in our proof needs the following lemma
+\begin{center}
+\bl{Der\,c\,A$^*$ $=$ (Der c A)\,@\, A$^*$}
+\end{center}
+\end{itemize}\bigskip\bigskip
-\small
-which takes a state as argument and a character and produces a new state\smallskip\\
-this function might not always be defined
+
+
+\begin{itemize}
+\item If \bl{\texttt{""} $\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B $\cup$ (Der\,c\,B)}\medskip
+\item If \bl{\texttt{""} $\not\in$ A}, then\\ \bl{Der\,c\,(A @ B) $=$ (Der\,c\,A) @ B}
+
\end{itemize}
\end{frame}}