# HG changeset patch # User Christian Urban # Date 1380188507 -3600 # Node ID 1ab41c59e3d368cc243d7155999b5de52cbee25b # Parent 4758a615587825b80eb687c65c9ec20ffe4b0a2b links diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw01.pdf Binary file hw/hw01.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw01.tex --- a/hw/hw01.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} - -\begin{document} - -\section*{Homework 1} - -\begin{enumerate} -\item {\bf (Optional)} If you want to run the code presented -in the lectures, install the -Scala programming language available (for free) from -\begin{center} -\url{http://www.scala-lang.org} -\end{center} - -\item {\bf (Optional)} Have a look at the crawler programs. -Can you find a usage for them in your daily programming life? - -\item In the context of the AFL-course, what is meant by the term \emph{language}? - -\item Give the definition for regular expressions. What is the meaning of a -regular expression? - -\item Assume the concatenation operation of two strings is written as $s_1 @ s_2$. -Define the operation of \emph{concatenating} two sets of strings. - -\item How is the power of a language defined? (Hint: There are two rules, one for $\_\!\_^0$ and -one for $\_\!\_^{n+1}$.) - -\item Given the regular expressions $r_1 = \epsilon$ and $r_2 = \varnothing$ and $r_3 = a$. -How many strings can the regular expressions $r_1^*$, $r_2^*$ and $r_3^*$ each match? - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw02.pdf Binary file hw/hw02.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw02.tex --- a/hw/hw02.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\begin{document} - -\section*{Homework 2} - -\begin{enumerate} -\item Give regular expressions for (a) decimal numbers and for (b) binary numbers. -(Hint: Observe that the empty string is not a number. Also observe that leading 0s -are normally not written.) - -\item Decide whether the following two regular expressions are equivalent $(\epsilon + a)^* \equiv^? a^*$ and -$(a \cdot b)^* \cdot a \equiv^? a \cdot (b \cdot a)^*$. - -\item Given the regular expression $r = (a \cdot b + b)^*$. Compute what the derivative of $r$ is with respect to -$a$ and $b$. Is $r$ nullable? - -\item What is a regular language? - -\item Prove that for all regular expressions $r$ we have -\begin{center} -$\text{nullable}(r)$ \quad if and only if \quad $\texttt{""} \in L(r)$ -\end{center} - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw03.pdf Binary file hw/hw03.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw03.tex --- a/hw/hw03.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\begin{document} - -\section*{Homework 3} - -\begin{enumerate} -\item Assume you have an alphabet consisting of the letters $a$, $b$ and $c$ only. -(a) Find a regular expression that recognises the two strings $ab$ and $ac$. (b) -Find a regular expression that matches all strings \emph{except} these two strings. -Note, you can only use regular expressions of the form -\begin{center} -$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ -\end{center} - -\item Define the function $zeroable$ which takes a regular expression as argument -and returns a boolean.\footnote{In an earlier version there was an error.} The -function should satisfy the following property: -\begin{center} -$zeroable(r)$ \;if and only if\; $L(r) = \varnothing$ -\end{center} - -\item Define the tokens and regular expressions for a language -consisting of numbers, left-parenthesis (, right-parenthesis ), -identifiers and the operations $+$, $-$ and $*$. Can the following strings -in this language be lexed? - -\begin{itemize} -\item \texttt{"}$(a + 3) * b$\texttt{"} -\item \texttt{"}$)()++ -33$\texttt{"} -\item \texttt{"}$(a / 3) * 3$\texttt{"} -\end{itemize} - - -In case they can, can you give the corresponding token sequences. -\end{enumerate} - - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw04.pdf Binary file hw/hw04.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw04.tex --- a/hw/hw04.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 4} - -\begin{enumerate} -\item Why is every finite set of strings a regular language? - -\item What is the language recognised by the regular expressions $(\varnothing^*)^*$. - -\item If a regular expression $r$ does not contain any occurrence of $\varnothing$ -is it possible for $L(r)$ to be empty? - -\item Assume that $s^{-1}$ stands for the operation of reversing a -string $s$. Given the following \emph{reversing} function on regular -expressions - -\begin{center} -\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} -$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ -$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ -$rev(c)$ & $\dn$ & $c$\\ -$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ -$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ -$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ -\end{tabular} -\end{center} - - -and the set - -\begin{center} -$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$ -\end{center} - -prove whether - -\begin{center} -$L(rev(r)) = Rev (L(r))$ -\end{center} - -holds. - -\item Give a regular expression over the alphabet $\{a,b\}$ recognising all strings -that do not contain any substring $bb$ and end in $a$. - -\item Assume the delimiters for comments are \texttt{$\slash$*} and \texttt{*$\slash$}. -Give a regular expression that can recognise comments -of the form - -\begin{center} -\texttt{$\slash$*~\ldots{}~*$\slash$} -\end{center} - -where the three dots stand for arbitrary characters, but not comment delimiters. -(Hint: You can assume you are already given a regular expression written \texttt{ALL}, -that can recognise any character, and a regular expression \texttt{NOT} that recognises -the complement of a regular expression.) - -\item Given the alphabet $\{a,b\}$. Draw the automaton that has two states, say $q_0$ and $q_1$. -The starting state is $q_0$ and the final state is $q_1$. The transition -function is given by - -\begin{center} -\begin{tabular}{l} -$(q_0, a) \rightarrow q_0$\\ -$(q_0, b) \rightarrow q_1$\\ -$(q_1, b) \rightarrow q_1$ -\end{tabular} -\end{center} - -What is the languages recognised by this automaton? - -\item Give a deterministic finite automaton that can recognise -the language $L(a^*\cdot b\cdot b^*)$. - - -\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as -argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so -that it filters out all comments and whitespace from the result. - -\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it -implements the \texttt{findAll} function. This function takes a regular -expressions and a string, and returns all substrings in this string that -match the regular expression. -\end{enumerate} - -% explain what is a context-free grammar and the language it generates -% -% -% Define the language L(M) accepted by a deterministic finite automaton M. -% -% -% does (a + b)*b+ and (a*b+) + (b*b+) define the same language - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw05.pdf Binary file hw/hw05.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw05.tex --- a/hw/hw05.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,116 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 5} - -\begin{enumerate} -\item Define the following regular expressions - -\begin{center} -\begin{tabular}{ll} -$r^+$ & (one or more matches)\\ -$r^?$ & (zero or one match)\\ -$r^{\{n\}}$ & (exactly $n$ matches)\\ -$r^{\{m, n\}}$ & (at least $m$ and maximal $n$ matches, with the\\ -& \phantom{(}assumption $m \le n$)\\ -\end{tabular} -\end{center} - -in terms of the usual regular expressions - -\begin{center} -$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ -\end{center} - -\item Given a deterministic finite automata $A(Q, q_0, F, \delta)$, -define which language is recognised by this automaton. - -\item Given the following deterministic finite automata over the alphabet -$\{a, b\}$, find an automaton that recognises the complement language. -(Hint: Recall that for the algorithm from the lectures, the automaton needs to be -in completed form, that is have a transition for every letter from the alphabet.) - -\begin{center} -\begin{tikzpicture}[scale=3, line width=0.7mm] - \node[state, initial] (q0) at ( 0,1) {$q_0$}; - \node[state, accepting] (q1) at ( 1,1) {$q_1$}; - \path[->] (q0) edge node[above] {$a$} (q1) - (q1) edge [loop right] node {$b$} () - ; -\end{tikzpicture} -\end{center} - -\item Given the following deterministic finite automaton - -\begin{center} -\begin{tikzpicture}[scale=3, line width=0.7mm] - \node[state, initial] (q0) at ( 0,1) {$q_0$}; - \node[state,accepting] (q1) at ( 1,1) {$q_1$}; - \node[state, accepting] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge node[above] {$b$} (q1) - (q1) edge [loop above] node[above] {$a$} () - (q2) edge [loop above] node[above] {$a, b$} () - (q1) edge node[above] {$b$} (q2) - (q0) edge[bend right] node[below] {$a$} (q2) - ; -\end{tikzpicture} -\end{center} -find the corresponding minimal automaton. State clearly which nodes -can be merged. - -\item Given the following non-deterministic finite automaton over the alphabet $\{a, b\}$, -find a deterministic finite automaton that recognises the same language: - -\begin{center} -\begin{tikzpicture}[scale=3, line width=0.7mm] - \node[state, initial] (q0) at ( 0,1) {$q_0$}; - \node[state] (q1) at ( 1,1) {$q_1$}; - \node[state, accepting] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge node[above] {$a$} (q1) - (q0) edge [loop above] node[above] {$b$} () - (q0) edge [loop below] node[below] {$a$} () - (q1) edge node[above] {$a$} (q2) - ; -\end{tikzpicture} -\end{center} - -\item -Given the following finite deterministic automaton over the alphabet $\{a, b\}$: - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; - \node[state, accepting] (q1) at ( 1,1) {$q_1$}; - \node[state] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge[bend left] node[above] {$a$} (q1) - (q1) edge[bend left] node[above] {$b$} (q0) - (q2) edge[bend left=50] node[below] {$b$} (q0) - (q1) edge node[above] {$a$} (q2) - (q2) edge [loop right] node {$a$} () - (q0) edge [loop below] node {$b$} () - ; -\end{tikzpicture} -\end{center} - -Give a regular expression that can recognise the same language as -this automaton. (Hint: If you use Brzozwski's method, you can assume -Arden's lemma which states that an equation of the form $q = q\cdot r + s$ -has the unique solution $q = s \cdot r^*$.)\ -\end{enumerate} - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw06.pdf Binary file hw/hw06.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw06.tex --- a/hw/hw06.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 6} - -\begin{enumerate} -\item (i) Give the regular expressions for lexing a language -consisting of whitespaces, identifiers (some letters followed by digits), numbers, -operations \texttt{=}, \texttt{<} and \texttt{>}, and the keywords -\texttt{if}, \texttt{then} and \texttt{else}. -(ii) Decide whether the following strings -can be lexed in this language? - -\begin{enumerate} -\item \texttt{"if y4 = 3 then 1 else 3"} -\item \texttt{"if33 ifif then then23 else else 32"} -\item \texttt{"if x4x < 33 then 1 else 3"} -\end{enumerate} - -In case they can, give the corresponding token sequences. (Hint: -Observe the maximal munch rule and priorities of your regular -expressions that make the process of lexing unambiguous.) - -\item Suppose the grammar - -\begin{center} -\begin{tabular}{lcl} -$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F \;|\; F \cdot \backslash \cdot F$\\ -$F$ & $\rightarrow$ & $T \;|\; T \cdot \texttt{+} \cdot T \;|\; T \cdot \texttt{-} \cdot T$\\ -$T$ & $\rightarrow$ & $num \;|\; \texttt{(} \cdot E \cdot \texttt{)}$\\ -\end{tabular} -\end{center} - -where $E$, $F$ and $T$ are non-terminals, $E$ is the starting symbol of the grammar, and $num$ stands for -a number token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. - -\item Define what it means for a grammar to be ambiguous. Give an example of -an ambiguous grammar. - -\item Suppose boolean expressions are built up from - -\begin{center} -\begin{tabular}{ll} -1.) & tokens for \texttt{true} and \texttt{false},\\ -2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\ -3.) & the prefix operation $\neg$, and\\ -4.) & can be enclosed in parentheses. -\end{tabular} -\end{center} - -(i) Give a grammar that can recognise such boolean expressions -and (ii) give a sample string involving all rules given in 1.-4.~that -can be parsed by this grammar. - - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw07.pdf Binary file hw/hw07.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw07.tex --- a/hw/hw07.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 7} - -\begin{enumerate} -\item Suppose the following finite deterministic automaton over the alphabet $\{0, 1\}$. - -\begin{center} -\begin{tikzpicture}[scale=2, line width=0.5mm] - \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; - \node[state, accepting] (q1) at ( 1,1) {$q_1$}; - \node[state] (q2) at ( 2,1) {$q_2$}; - \path[->] (q0) edge[bend left] node[above] {$0$} (q1) - (q1) edge[bend left] node[above] {$1$} (q0) - (q2) edge[bend left=50] node[below] {$1$} (q0) - (q1) edge node[above] {$0$} (q2) - (q2) edge [loop right] node {$0$} () - (q0) edge [loop below] node {$1$} () - ; -\end{tikzpicture} -\end{center} - -Give a regular expression that can recognise the same language as -this automaton. (Hint: If you use Brzozwski's method, you can assume -Arden's lemma which states that an equation of the form $q = q\cdot r + s$ -has the unique solution $q = s \cdot r^*$.) - -\item Consider the following grammar - -\begin{center} -\begin{tabular}{l} -$S \rightarrow N\cdot P$\\ -$P \rightarrow V\cdot N$\\ -$N \rightarrow N\cdot N$\\ -$N \rightarrow A \cdot N$\\ -$N \rightarrow \texttt{student} \;|\; \texttt{trainer} \;|\; \texttt{team} \;|\; \texttt{trains}$\\ -$V \rightarrow \texttt{trains} \;|\; \texttt{team}$\\ -$A \rightarrow \texttt{The} \;|\; \texttt{the}$\\ -\end{tabular} -\end{center} - -where $S$ is the start symbol and $S$, $P$, $N$, $V$ and $A$ are non-terminals. -Using the CYK-algorithm, check whether or not the following string can be parsed -by the grammar: - -\begin{center} -\texttt{The trainer trains the student team} -\end{center} - -\item {\bf (Optional)} The task is to match strings where the letters are in alphabetical order---for example, -\texttt{abcfjz} would pass, but \texttt{acb} would not. Whitespace should be ignored---for example -\texttt{ab c d} should pass. The point is to try to get the regular expression as short as possible! -See: - -\begin{center} -\url{http://callumacrae.github.com/regex-tuesday/challenge11.html} -\end{center} -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw08.pdf Binary file hw/hw08.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/hw08.tex --- a/hw/hw08.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} -\usepackage{tikz} -\usetikzlibrary{automata} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions - -\begin{document} - -\section*{Homework 8} - -\begin{enumerate} -\item Suppose the following grammar for the WHILE-language: - -\begin{center} -\begin{tabular}{lcl} -$Stmt$ & $\rightarrow$ & $\text{skip}$\\ - & $|$ & $Id := AExp$\\ - & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ - & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ -$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ - & $|$ & $Stmt$\medskip\\ -$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ - & $|$ & $Stmt$\medskip\\ -$AExp$ & $\rightarrow$ & $AExp + AExp$\\ - & $|$ & $AExp * AExp$\\ - & $|$ & $( AExp )$\\ - & $|$ & $Num$\\ - & $|$ & $Id$\medskip\\ -$BExp$ & $\rightarrow$ & $AExp = AExp$\\ - & $|$ & $AExp \not= AExp$\\ - & $|$ & $\text{false}$\\ - & $|$ & $\text{true}$\\ - -\end{tabular} -\end{center} - -Transform this grammar into Chomsky normalform. - -\item Write a program in the WHILE-language that calculates the factorial function. - -\end{enumerate} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hw/proof.pdf Binary file hw/proof.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hw/proof.tex --- a/hw/proof.tex Thu Sep 26 10:39:23 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ -\documentclass{article} -\usepackage{charter} -\usepackage{hyperref} -\usepackage{amssymb} -\usepackage{amsmath} - -\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions -\begin{document} - -\section*{Proof} - -Recall the definitions for regular expressions and the language associated with a regular expression: - -\begin{center} -\begin{tabular}{c} -\begin{tabular}[t]{rcl} - $r$ & $::=$ & $\varnothing$ \\ - & $\mid$ & $\epsilon$ \\ - & $\mid$ & $c$ \\ - & $\mid$ & $r_1 \cdot r_2$ \\ - & $\mid$ & $r_1 + r_2$ \\ - & $\mid$ & $r^*$ \\ - \end{tabular}\hspace{10mm} -\begin{tabular}[t]{r@{\hspace{1mm}}c@{\hspace{1mm}}l} -$L(\varnothing)$ & $\dn$ & $\varnothing$ \\ -$L(\epsilon)$ & $\dn$ & $\{\texttt{""}\}$ \\ -$L(c)$ & $\dn$ & $\{\texttt{"}c\texttt{"}\}$ \\ -$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ \\ -$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ \\ - $L(r^*)$ & $\dn$ & $\bigcup_{n\ge 0} L(r)^n$ \\ - \end{tabular} -\end{tabular} -\end{center} - -\noindent -We also defined the notion of a derivative of a regular expression (the derivative with respect to a character): - -\begin{center} -\begin{tabular}{lcl} - $der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$ \\ - $der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\ - $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\ - $der\, c\, (r_1 + r_2)$ & $\dn$ & $(der\, c\, r_1) + (der\, c\, r_2)$ \\ - $der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable(r_1)$\\ - & & then $((der\, c\, r_1) \cdot r_2) + (der\, c\, r_2)$\\ - & & else $(der\, c\, r_1) \cdot r_2$\\ - $der\, c\, (r^*)$ & $\dn$ & $(der\, c\, r) \cdot (r^*)$\\ - \end{tabular} -\end{center} - -\noindent -With our definition of regular expressions comes an induction principle. Given a property $P$ over -regular expressions. We can establish that $\forall r.\; P(r)$ holds, provided we can show the following: - -\begin{enumerate} -\item $P(\varnothing)$, $P(\epsilon)$ and $P(c)$ all hold, -\item $P(r_1 + r_2)$ holds under the induction hypotheses that -$P(r_1)$ and $P(r_2)$ hold, -\item $P(r_1 \cdot r_2)$ holds under the induction hypotheses that -$P(r_1)$ and $P(r_2)$ hold, and -\item $P(r^*)$ holds under the induction hypothesis that $P(r)$ holds. -\end{enumerate} - -\noindent -Let us try out an induction proof. Recall the definition - -\begin{center} -$Der\, c\, A \dn \{ s\;\mid\; c\!::\!s \in A\}$ -\end{center} - -\noindent -whereby $A$ is a set of strings. We like to prove - -\begin{center} -\begin{tabular}{l} -$P(r) \dn $ \hspace{4mm} $L(der\,c\,r) = Der\,c\,(L(r))$ -\end{tabular} -\end{center} - -\noindent -by induction over the regular expression $r$. - - -\newpage -\noindent -{\bf Proof} - -\noindent -According to 1.~above we need to prove $P(\varnothing)$, $P(\epsilon)$ and $P(d)$. Lets do this in turn. - -\begin{itemize} -\item First Case: $P(\varnothing)$ is $L(der\,c\,\varnothing) = Der\,c\,(L(\varnothing))$ (a). We have $der\,c\,\varnothing = \varnothing$ -and $L(\varnothing) = \varnothing$. We also have $Der\,c\,\varnothing = \varnothing$. Hence we have $\varnothing = \varnothing$ in (a). - -\item Second Case: $P(\epsilon)$ is $L(der\,c\,\epsilon) = Der\,c\,(L(\epsilon))$ (b). We have $der\,c\,\epsilon = \varnothing$, -$L(\varnothing) = \varnothing$ and $L(\epsilon) = \{\texttt{""}\}$. We also have $Der\,c\,\{\texttt{""}\} = \varnothing$. Hence we have -$\varnothing = \varnothing$ in (b). - -\item Third Case: $P(d)$ is $L(der\,c\,d) = Der\,c\,(L(d))$ (c). We need to treat the cases $d = c$ and $d \not= c$. - -$d = c$: We have $der\,c\,c = \epsilon$ and $L(\epsilon) = \{\texttt{""}\}$. -We also have $L(c) = \{\texttt{"}c\texttt{"}\}$ and $Der\,c\,\{\texttt{"}c\texttt{"}\} = \{\texttt{""}\}$. Hence we have -$\{\texttt{""}\} = \{\texttt{""}\}$ in (c). - -$d \not=c$: We have $der\,c\,d = \varnothing$. -We also have $Der\,c\,\{\texttt{"}d\texttt{"}\} = \varnothing$. Hence we have -$\varnothing = \varnothing$ in (c). -\end{itemize} - -\noindent -These were the easy base cases. Now come the inductive cases. - -\begin{itemize} -\item Fourth Case: $P(r_1 + r_2)$ is $L(der\,c\,(r_1 + r_2)) = Der\,c\,(L(r_1 + r_2))$ (d). This is what we have to show. -We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ -$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) -\end{tabular} -\end{center} - -We have that $der\,c\,(r_1 + r_2) = (der\,c\,r_1) + (der\,c\,r_2)$ and also $L((der\,c\,r_1) + (der\,c\,r_2)) = L(der\,c\,r_1) \cup L(der\,c\,r_2)$. -By (I) and (II) we know that the left-hand side is $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$. You need to ponder a bit, but you should see -that - -\begin{center} -$Der\,c(A \cup B) = (Der\,c\,A) \cup (Der\,c\,B)$ -\end{center} - -holds for every set of strings $A$ and $B$. That means the right-hand side of (d) is also $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$, -because $L(r_1 + r_2) = L(r_1) \cup L(r_2)$. And we are done with the fourth case. - -\item Fifth Case: $P(r_1 \cdot r_2)$ is $L(der\,c\,(r_1 \cdot r_2)) = Der\,c\,(L(r_1 \cdot r_2))$ (e). We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ -$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) -\end{tabular} -\end{center} - -Let us first consider the case where $nullable(r_1)$ holds. Then - -\[ -der\,c\,(r_1 \cdot r_2) = ((der\,c\,r_1) \cdot r_2) + (der\,c\,r_2). -\] - -The corresponding language of the right-hand side is - -\[ -(L(der\,c\,r_1) \,@\, L(r_2)) \cup L(der\,c\,r_2). -\] - -By the induction hypotheses (I) and (II), this is equal to - -\[ -(Der\,c\,(L(r_1)) \,@\, L(r_2)) \cup (Der\,c\,(L(r_2)).\;\;(**) -\] - -We also know that $L(r_1 \cdot r_2) = L(r_1) \,@\,L(r_2)$. We have to know what -$Der\,c\,(L(r_1) \,@\,L(r_2))$ is. - -Let us analyse what -$Der\,c\,(A \,@\, B)$ is for arbitrary sets of strings $A$ and $B$. If $A$ does \emph{not} -contain the empty string, then every string in $A\,@\,B$ is of the form $s_1 \,@\, s_2$ where -$s_1 \in A$ and $s_2 \in B$. So if $s_1$ starts with $c$ then we just have to remove it. Consequently, -$Der\,c\,(A \,@\, B) = (Der\,c\,(A)) \,@\, B$. This case does not apply here though, because we already -proved that if $r_1$ is nullable, then $L(r_1)$ contains the empty string. In this case, every string -in $A\,@\,B$ is either of the form $s_1 \,@\, s_2$, with $s_1 \in A$ and $s_2 \in B$, or -$s_3$ with $s_3 \in B$. This means $Der\,c\,(A \,@\, B) = ((Der\,c\,(A)) \,@\, B) \cup Der\,c\,B$. -But this proves that (**) is $Der\,c\,(L(r_1) \,@\, L(r_2))$. - -Similarly in the case where $r_1$ is \emph{not} nullable. - -\item Sixth Case: $P(r^*)$ is $L(der\,c\,(r^*)) = Der\,c\,L(r^*)$. We can assume already: - -\begin{center} -\begin{tabular}{ll} -$P(r)$: & $L(der\,c\,r) = Der\,c\,(L(r))$ (I) -\end{tabular} -\end{center} - -We have $der\,c\,(r^*) = der\,c\,r\cdot r^*$. Which means $L(der\,c\,(r^*)) = L(der\,c\,r\cdot r^*)$ and -further $L(der\,c\,r) \,@\, L(r^*)$. By induction hypothesis (I) we know that is equal to -$(Der\,c\,L(r)) \,@\, L(r^*)$. (*) - -\end{itemize} - - - - -Let us now analyse $Der\,c\,L(r^*)$, which is equal to $Der\,c\,((L(r))^*)$. Now $(L(r))^*$ is defined -as $\bigcup_{n \ge 0} L(r)$. We can write this as $L(r)^0 \cup \bigcup_{n \ge 1} L(r)$, where we just -separated the first union and then let the ``big-union'' start from $1$. Form this we can already infer - -\begin{center} -$Der\,c\,(L(r^*)) = Der\,c\,(L(r)^0 \cup \bigcup_{n \ge 1} L(r)) = (Der\,c\,L(r)^0) \cup Der\,c\,(\bigcup_{n \ge 1} L(r))$ -\end{center} - -The first union ``disappears'' since $Der\,c\,(L(r)^0) = \varnothing$. - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw01.pdf Binary file hws/hw01.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw01.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,42 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} + +\begin{document} + +\section*{Homework 1} + +\begin{enumerate} +\item {\bf (Optional)} If you want to run the code presented +in the lectures, install the +Scala programming language available (for free) from +\begin{center} +\url{http://www.scala-lang.org} +\end{center} + +\item {\bf (Optional)} Have a look at the crawler programs. +Can you find a usage for them in your daily programming life? + +\item In the context of the AFL-course, what is meant by the term \emph{language}? + +\item Give the definition for regular expressions. What is the meaning of a +regular expression? + +\item Assume the concatenation operation of two strings is written as $s_1 @ s_2$. +Define the operation of \emph{concatenating} two sets of strings. + +\item How is the power of a language defined? (Hint: There are two rules, one for $\_\!\_^0$ and +one for $\_\!\_^{n+1}$.) + +\item Given the regular expressions $r_1 = \epsilon$ and $r_2 = \varnothing$ and $r_3 = a$. +How many strings can the regular expressions $r_1^*$, $r_2^*$ and $r_3^*$ each match? + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw02.pdf Binary file hws/hw02.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw02.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,36 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\begin{document} + +\section*{Homework 2} + +\begin{enumerate} +\item Give regular expressions for (a) decimal numbers and for (b) binary numbers. +(Hint: Observe that the empty string is not a number. Also observe that leading 0s +are normally not written.) + +\item Decide whether the following two regular expressions are equivalent $(\epsilon + a)^* \equiv^? a^*$ and +$(a \cdot b)^* \cdot a \equiv^? a \cdot (b \cdot a)^*$. + +\item Given the regular expression $r = (a \cdot b + b)^*$. Compute what the derivative of $r$ is with respect to +$a$ and $b$. Is $r$ nullable? + +\item What is a regular language? + +\item Prove that for all regular expressions $r$ we have +\begin{center} +$\text{nullable}(r)$ \quad if and only if \quad $\texttt{""} \in L(r)$ +\end{center} + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw03.pdf Binary file hws/hw03.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw03.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,49 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\begin{document} + +\section*{Homework 3} + +\begin{enumerate} +\item Assume you have an alphabet consisting of the letters $a$, $b$ and $c$ only. +(a) Find a regular expression that recognises the two strings $ab$ and $ac$. (b) +Find a regular expression that matches all strings \emph{except} these two strings. +Note, you can only use regular expressions of the form +\begin{center} +$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ +\end{center} + +\item Define the function $zeroable$ which takes a regular expression as argument +and returns a boolean.\footnote{In an earlier version there was an error.} The +function should satisfy the following property: +\begin{center} +$zeroable(r)$ \;if and only if\; $L(r) = \varnothing$ +\end{center} + +\item Define the tokens and regular expressions for a language +consisting of numbers, left-parenthesis (, right-parenthesis ), +identifiers and the operations $+$, $-$ and $*$. Can the following strings +in this language be lexed? + +\begin{itemize} +\item \texttt{"}$(a + 3) * b$\texttt{"} +\item \texttt{"}$)()++ -33$\texttt{"} +\item \texttt{"}$(a / 3) * 3$\texttt{"} +\end{itemize} + + +In case they can, can you give the corresponding token sequences. +\end{enumerate} + + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw04.pdf Binary file hws/hw04.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw04.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw04.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,109 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 4} + +\begin{enumerate} +\item Why is every finite set of strings a regular language? + +\item What is the language recognised by the regular expressions $(\varnothing^*)^*$. + +\item If a regular expression $r$ does not contain any occurrence of $\varnothing$ +is it possible for $L(r)$ to be empty? + +\item Assume that $s^{-1}$ stands for the operation of reversing a +string $s$. Given the following \emph{reversing} function on regular +expressions + +\begin{center} +\begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l} +$rev(\varnothing)$ & $\dn$ & $\varnothing$\\ +$rev(\epsilon)$ & $\dn$ & $\epsilon$\\ +$rev(c)$ & $\dn$ & $c$\\ +$rev(r_1 + r_2)$ & $\dn$ & $rev(r_1) + rev(r_2)$\\ +$rev(r_1 \cdot r_2)$ & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\ +$rev(r^*)$ & $\dn$ & $rev(r)^*$\\ +\end{tabular} +\end{center} + + +and the set + +\begin{center} +$Rev\,A \dn \{s^{-1} \;|\; s \in A\}$ +\end{center} + +prove whether + +\begin{center} +$L(rev(r)) = Rev (L(r))$ +\end{center} + +holds. + +\item Give a regular expression over the alphabet $\{a,b\}$ recognising all strings +that do not contain any substring $bb$ and end in $a$. + +\item Assume the delimiters for comments are \texttt{$\slash$*} and \texttt{*$\slash$}. +Give a regular expression that can recognise comments +of the form + +\begin{center} +\texttt{$\slash$*~\ldots{}~*$\slash$} +\end{center} + +where the three dots stand for arbitrary characters, but not comment delimiters. +(Hint: You can assume you are already given a regular expression written \texttt{ALL}, +that can recognise any character, and a regular expression \texttt{NOT} that recognises +the complement of a regular expression.) + +\item Given the alphabet $\{a,b\}$. Draw the automaton that has two states, say $q_0$ and $q_1$. +The starting state is $q_0$ and the final state is $q_1$. The transition +function is given by + +\begin{center} +\begin{tabular}{l} +$(q_0, a) \rightarrow q_0$\\ +$(q_0, b) \rightarrow q_1$\\ +$(q_1, b) \rightarrow q_1$ +\end{tabular} +\end{center} + +What is the languages recognised by this automaton? + +\item Give a deterministic finite automaton that can recognise +the language $L(a^*\cdot b\cdot b^*)$. + + +\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as +argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so +that it filters out all comments and whitespace from the result. + +\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it +implements the \texttt{findAll} function. This function takes a regular +expressions and a string, and returns all substrings in this string that +match the regular expression. +\end{enumerate} + +% explain what is a context-free grammar and the language it generates +% +% +% Define the language L(M) accepted by a deterministic finite automaton M. +% +% +% does (a + b)*b+ and (a*b+) + (b*b+) define the same language + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw05.pdf Binary file hws/hw05.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw05.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw05.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,116 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 5} + +\begin{enumerate} +\item Define the following regular expressions + +\begin{center} +\begin{tabular}{ll} +$r^+$ & (one or more matches)\\ +$r^?$ & (zero or one match)\\ +$r^{\{n\}}$ & (exactly $n$ matches)\\ +$r^{\{m, n\}}$ & (at least $m$ and maximal $n$ matches, with the\\ +& \phantom{(}assumption $m \le n$)\\ +\end{tabular} +\end{center} + +in terms of the usual regular expressions + +\begin{center} +$r ::= \varnothing \;|\; \epsilon \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ +\end{center} + +\item Given a deterministic finite automata $A(Q, q_0, F, \delta)$, +define which language is recognised by this automaton. + +\item Given the following deterministic finite automata over the alphabet +$\{a, b\}$, find an automaton that recognises the complement language. +(Hint: Recall that for the algorithm from the lectures, the automaton needs to be +in completed form, that is have a transition for every letter from the alphabet.) + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \path[->] (q0) edge node[above] {$a$} (q1) + (q1) edge [loop right] node {$b$} () + ; +\end{tikzpicture} +\end{center} + +\item Given the following deterministic finite automaton + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state,accepting] (q1) at ( 1,1) {$q_1$}; + \node[state, accepting] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge node[above] {$b$} (q1) + (q1) edge [loop above] node[above] {$a$} () + (q2) edge [loop above] node[above] {$a, b$} () + (q1) edge node[above] {$b$} (q2) + (q0) edge[bend right] node[below] {$a$} (q2) + ; +\end{tikzpicture} +\end{center} +find the corresponding minimal automaton. State clearly which nodes +can be merged. + +\item Given the following non-deterministic finite automaton over the alphabet $\{a, b\}$, +find a deterministic finite automaton that recognises the same language: + +\begin{center} +\begin{tikzpicture}[scale=3, line width=0.7mm] + \node[state, initial] (q0) at ( 0,1) {$q_0$}; + \node[state] (q1) at ( 1,1) {$q_1$}; + \node[state, accepting] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge node[above] {$a$} (q1) + (q0) edge [loop above] node[above] {$b$} () + (q0) edge [loop below] node[below] {$a$} () + (q1) edge node[above] {$a$} (q2) + ; +\end{tikzpicture} +\end{center} + +\item +Given the following finite deterministic automaton over the alphabet $\{a, b\}$: + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \node[state] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge[bend left] node[above] {$a$} (q1) + (q1) edge[bend left] node[above] {$b$} (q0) + (q2) edge[bend left=50] node[below] {$b$} (q0) + (q1) edge node[above] {$a$} (q2) + (q2) edge [loop right] node {$a$} () + (q0) edge [loop below] node {$b$} () + ; +\end{tikzpicture} +\end{center} + +Give a regular expression that can recognise the same language as +this automaton. (Hint: If you use Brzozwski's method, you can assume +Arden's lemma which states that an equation of the form $q = q\cdot r + s$ +has the unique solution $q = s \cdot r^*$.)\ +\end{enumerate} + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw06.pdf Binary file hws/hw06.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw06.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw06.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,72 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 6} + +\begin{enumerate} +\item (i) Give the regular expressions for lexing a language +consisting of whitespaces, identifiers (some letters followed by digits), numbers, +operations \texttt{=}, \texttt{<} and \texttt{>}, and the keywords +\texttt{if}, \texttt{then} and \texttt{else}. +(ii) Decide whether the following strings +can be lexed in this language? + +\begin{enumerate} +\item \texttt{"if y4 = 3 then 1 else 3"} +\item \texttt{"if33 ifif then then23 else else 32"} +\item \texttt{"if x4x < 33 then 1 else 3"} +\end{enumerate} + +In case they can, give the corresponding token sequences. (Hint: +Observe the maximal munch rule and priorities of your regular +expressions that make the process of lexing unambiguous.) + +\item Suppose the grammar + +\begin{center} +\begin{tabular}{lcl} +$E$ & $\rightarrow$ & $F \;|\; F \cdot * \cdot F \;|\; F \cdot \backslash \cdot F$\\ +$F$ & $\rightarrow$ & $T \;|\; T \cdot \texttt{+} \cdot T \;|\; T \cdot \texttt{-} \cdot T$\\ +$T$ & $\rightarrow$ & $num \;|\; \texttt{(} \cdot E \cdot \texttt{)}$\\ +\end{tabular} +\end{center} + +where $E$, $F$ and $T$ are non-terminals, $E$ is the starting symbol of the grammar, and $num$ stands for +a number token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. + +\item Define what it means for a grammar to be ambiguous. Give an example of +an ambiguous grammar. + +\item Suppose boolean expressions are built up from + +\begin{center} +\begin{tabular}{ll} +1.) & tokens for \texttt{true} and \texttt{false},\\ +2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\ +3.) & the prefix operation $\neg$, and\\ +4.) & can be enclosed in parentheses. +\end{tabular} +\end{center} + +(i) Give a grammar that can recognise such boolean expressions +and (ii) give a sample string involving all rules given in 1.-4.~that +can be parsed by this grammar. + + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw07.pdf Binary file hws/hw07.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw07.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw07.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,75 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 7} + +\begin{enumerate} +\item Suppose the following finite deterministic automaton over the alphabet $\{0, 1\}$. + +\begin{center} +\begin{tikzpicture}[scale=2, line width=0.5mm] + \node[state, initial, accepting] (q0) at ( 0,1) {$q_0$}; + \node[state, accepting] (q1) at ( 1,1) {$q_1$}; + \node[state] (q2) at ( 2,1) {$q_2$}; + \path[->] (q0) edge[bend left] node[above] {$0$} (q1) + (q1) edge[bend left] node[above] {$1$} (q0) + (q2) edge[bend left=50] node[below] {$1$} (q0) + (q1) edge node[above] {$0$} (q2) + (q2) edge [loop right] node {$0$} () + (q0) edge [loop below] node {$1$} () + ; +\end{tikzpicture} +\end{center} + +Give a regular expression that can recognise the same language as +this automaton. (Hint: If you use Brzozwski's method, you can assume +Arden's lemma which states that an equation of the form $q = q\cdot r + s$ +has the unique solution $q = s \cdot r^*$.) + +\item Consider the following grammar + +\begin{center} +\begin{tabular}{l} +$S \rightarrow N\cdot P$\\ +$P \rightarrow V\cdot N$\\ +$N \rightarrow N\cdot N$\\ +$N \rightarrow A \cdot N$\\ +$N \rightarrow \texttt{student} \;|\; \texttt{trainer} \;|\; \texttt{team} \;|\; \texttt{trains}$\\ +$V \rightarrow \texttt{trains} \;|\; \texttt{team}$\\ +$A \rightarrow \texttt{The} \;|\; \texttt{the}$\\ +\end{tabular} +\end{center} + +where $S$ is the start symbol and $S$, $P$, $N$, $V$ and $A$ are non-terminals. +Using the CYK-algorithm, check whether or not the following string can be parsed +by the grammar: + +\begin{center} +\texttt{The trainer trains the student team} +\end{center} + +\item {\bf (Optional)} The task is to match strings where the letters are in alphabetical order---for example, +\texttt{abcfjz} would pass, but \texttt{acb} would not. Whitespace should be ignored---for example +\texttt{ab c d} should pass. The point is to try to get the regular expression as short as possible! +See: + +\begin{center} +\url{http://callumacrae.github.com/regex-tuesday/challenge11.html} +\end{center} +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw08.pdf Binary file hws/hw08.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/hw08.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/hw08.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,52 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{tikz} +\usetikzlibrary{automata} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions + +\begin{document} + +\section*{Homework 8} + +\begin{enumerate} +\item Suppose the following grammar for the WHILE-language: + +\begin{center} +\begin{tabular}{lcl} +$Stmt$ & $\rightarrow$ & $\text{skip}$\\ + & $|$ & $Id := AExp$\\ + & $|$ & $\text{if}\; B\!Exp \;\text{then}\; Block \;\text{else}\; Block$\\ + & $|$ & $\text{while}\; B\!Exp \;\text{do}\; Block$\medskip\\ +$Stmts$ & $\rightarrow$ & $Stmt \;\text{;}\; Stmts$\\ + & $|$ & $Stmt$\medskip\\ +$Block$ & $\rightarrow$ & $\{ Stmts \}$\\ + & $|$ & $Stmt$\medskip\\ +$AExp$ & $\rightarrow$ & $AExp + AExp$\\ + & $|$ & $AExp * AExp$\\ + & $|$ & $( AExp )$\\ + & $|$ & $Num$\\ + & $|$ & $Id$\medskip\\ +$BExp$ & $\rightarrow$ & $AExp = AExp$\\ + & $|$ & $AExp \not= AExp$\\ + & $|$ & $\text{false}$\\ + & $|$ & $\text{true}$\\ + +\end{tabular} +\end{center} + +Transform this grammar into Chomsky normalform. + +\item Write a program in the WHILE-language that calculates the factorial function. + +\end{enumerate} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4758a6155878 -r 1ab41c59e3d3 hws/proof.pdf Binary file hws/proof.pdf has changed diff -r 4758a6155878 -r 1ab41c59e3d3 hws/proof.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hws/proof.tex Thu Sep 26 10:41:47 2013 +0100 @@ -0,0 +1,210 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions +\begin{document} + +\section*{Proof} + +Recall the definitions for regular expressions and the language associated with a regular expression: + +\begin{center} +\begin{tabular}{c} +\begin{tabular}[t]{rcl} + $r$ & $::=$ & $\varnothing$ \\ + & $\mid$ & $\epsilon$ \\ + & $\mid$ & $c$ \\ + & $\mid$ & $r_1 \cdot r_2$ \\ + & $\mid$ & $r_1 + r_2$ \\ + & $\mid$ & $r^*$ \\ + \end{tabular}\hspace{10mm} +\begin{tabular}[t]{r@{\hspace{1mm}}c@{\hspace{1mm}}l} +$L(\varnothing)$ & $\dn$ & $\varnothing$ \\ +$L(\epsilon)$ & $\dn$ & $\{\texttt{""}\}$ \\ +$L(c)$ & $\dn$ & $\{\texttt{"}c\texttt{"}\}$ \\ +$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ \\ +$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ \\ + $L(r^*)$ & $\dn$ & $\bigcup_{n\ge 0} L(r)^n$ \\ + \end{tabular} +\end{tabular} +\end{center} + +\noindent +We also defined the notion of a derivative of a regular expression (the derivative with respect to a character): + +\begin{center} +\begin{tabular}{lcl} + $der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$ \\ + $der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\ + $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\ + $der\, c\, (r_1 + r_2)$ & $\dn$ & $(der\, c\, r_1) + (der\, c\, r_2)$ \\ + $der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable(r_1)$\\ + & & then $((der\, c\, r_1) \cdot r_2) + (der\, c\, r_2)$\\ + & & else $(der\, c\, r_1) \cdot r_2$\\ + $der\, c\, (r^*)$ & $\dn$ & $(der\, c\, r) \cdot (r^*)$\\ + \end{tabular} +\end{center} + +\noindent +With our definition of regular expressions comes an induction principle. Given a property $P$ over +regular expressions. We can establish that $\forall r.\; P(r)$ holds, provided we can show the following: + +\begin{enumerate} +\item $P(\varnothing)$, $P(\epsilon)$ and $P(c)$ all hold, +\item $P(r_1 + r_2)$ holds under the induction hypotheses that +$P(r_1)$ and $P(r_2)$ hold, +\item $P(r_1 \cdot r_2)$ holds under the induction hypotheses that +$P(r_1)$ and $P(r_2)$ hold, and +\item $P(r^*)$ holds under the induction hypothesis that $P(r)$ holds. +\end{enumerate} + +\noindent +Let us try out an induction proof. Recall the definition + +\begin{center} +$Der\, c\, A \dn \{ s\;\mid\; c\!::\!s \in A\}$ +\end{center} + +\noindent +whereby $A$ is a set of strings. We like to prove + +\begin{center} +\begin{tabular}{l} +$P(r) \dn $ \hspace{4mm} $L(der\,c\,r) = Der\,c\,(L(r))$ +\end{tabular} +\end{center} + +\noindent +by induction over the regular expression $r$. + + +\newpage +\noindent +{\bf Proof} + +\noindent +According to 1.~above we need to prove $P(\varnothing)$, $P(\epsilon)$ and $P(d)$. Lets do this in turn. + +\begin{itemize} +\item First Case: $P(\varnothing)$ is $L(der\,c\,\varnothing) = Der\,c\,(L(\varnothing))$ (a). We have $der\,c\,\varnothing = \varnothing$ +and $L(\varnothing) = \varnothing$. We also have $Der\,c\,\varnothing = \varnothing$. Hence we have $\varnothing = \varnothing$ in (a). + +\item Second Case: $P(\epsilon)$ is $L(der\,c\,\epsilon) = Der\,c\,(L(\epsilon))$ (b). We have $der\,c\,\epsilon = \varnothing$, +$L(\varnothing) = \varnothing$ and $L(\epsilon) = \{\texttt{""}\}$. We also have $Der\,c\,\{\texttt{""}\} = \varnothing$. Hence we have +$\varnothing = \varnothing$ in (b). + +\item Third Case: $P(d)$ is $L(der\,c\,d) = Der\,c\,(L(d))$ (c). We need to treat the cases $d = c$ and $d \not= c$. + +$d = c$: We have $der\,c\,c = \epsilon$ and $L(\epsilon) = \{\texttt{""}\}$. +We also have $L(c) = \{\texttt{"}c\texttt{"}\}$ and $Der\,c\,\{\texttt{"}c\texttt{"}\} = \{\texttt{""}\}$. Hence we have +$\{\texttt{""}\} = \{\texttt{""}\}$ in (c). + +$d \not=c$: We have $der\,c\,d = \varnothing$. +We also have $Der\,c\,\{\texttt{"}d\texttt{"}\} = \varnothing$. Hence we have +$\varnothing = \varnothing$ in (c). +\end{itemize} + +\noindent +These were the easy base cases. Now come the inductive cases. + +\begin{itemize} +\item Fourth Case: $P(r_1 + r_2)$ is $L(der\,c\,(r_1 + r_2)) = Der\,c\,(L(r_1 + r_2))$ (d). This is what we have to show. +We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ +$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) +\end{tabular} +\end{center} + +We have that $der\,c\,(r_1 + r_2) = (der\,c\,r_1) + (der\,c\,r_2)$ and also $L((der\,c\,r_1) + (der\,c\,r_2)) = L(der\,c\,r_1) \cup L(der\,c\,r_2)$. +By (I) and (II) we know that the left-hand side is $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$. You need to ponder a bit, but you should see +that + +\begin{center} +$Der\,c(A \cup B) = (Der\,c\,A) \cup (Der\,c\,B)$ +\end{center} + +holds for every set of strings $A$ and $B$. That means the right-hand side of (d) is also $Der\,c\,(L(r_1)) \cup Der\,c\,(L(r_2))$, +because $L(r_1 + r_2) = L(r_1) \cup L(r_2)$. And we are done with the fourth case. + +\item Fifth Case: $P(r_1 \cdot r_2)$ is $L(der\,c\,(r_1 \cdot r_2)) = Der\,c\,(L(r_1 \cdot r_2))$ (e). We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r_1)$: & $L(der\,c\,r_1) = Der\,c\,(L(r_1))$ (I)\\ +$P(r_2)$: & $L(der\,c\,r_2) = Der\,c\,(L(r_2))$ (II) +\end{tabular} +\end{center} + +Let us first consider the case where $nullable(r_1)$ holds. Then + +\[ +der\,c\,(r_1 \cdot r_2) = ((der\,c\,r_1) \cdot r_2) + (der\,c\,r_2). +\] + +The corresponding language of the right-hand side is + +\[ +(L(der\,c\,r_1) \,@\, L(r_2)) \cup L(der\,c\,r_2). +\] + +By the induction hypotheses (I) and (II), this is equal to + +\[ +(Der\,c\,(L(r_1)) \,@\, L(r_2)) \cup (Der\,c\,(L(r_2)).\;\;(**) +\] + +We also know that $L(r_1 \cdot r_2) = L(r_1) \,@\,L(r_2)$. We have to know what +$Der\,c\,(L(r_1) \,@\,L(r_2))$ is. + +Let us analyse what +$Der\,c\,(A \,@\, B)$ is for arbitrary sets of strings $A$ and $B$. If $A$ does \emph{not} +contain the empty string, then every string in $A\,@\,B$ is of the form $s_1 \,@\, s_2$ where +$s_1 \in A$ and $s_2 \in B$. So if $s_1$ starts with $c$ then we just have to remove it. Consequently, +$Der\,c\,(A \,@\, B) = (Der\,c\,(A)) \,@\, B$. This case does not apply here though, because we already +proved that if $r_1$ is nullable, then $L(r_1)$ contains the empty string. In this case, every string +in $A\,@\,B$ is either of the form $s_1 \,@\, s_2$, with $s_1 \in A$ and $s_2 \in B$, or +$s_3$ with $s_3 \in B$. This means $Der\,c\,(A \,@\, B) = ((Der\,c\,(A)) \,@\, B) \cup Der\,c\,B$. +But this proves that (**) is $Der\,c\,(L(r_1) \,@\, L(r_2))$. + +Similarly in the case where $r_1$ is \emph{not} nullable. + +\item Sixth Case: $P(r^*)$ is $L(der\,c\,(r^*)) = Der\,c\,L(r^*)$. We can assume already: + +\begin{center} +\begin{tabular}{ll} +$P(r)$: & $L(der\,c\,r) = Der\,c\,(L(r))$ (I) +\end{tabular} +\end{center} + +We have $der\,c\,(r^*) = der\,c\,r\cdot r^*$. Which means $L(der\,c\,(r^*)) = L(der\,c\,r\cdot r^*)$ and +further $L(der\,c\,r) \,@\, L(r^*)$. By induction hypothesis (I) we know that is equal to +$(Der\,c\,L(r)) \,@\, L(r^*)$. (*) + +\end{itemize} + + + + +Let us now analyse $Der\,c\,L(r^*)$, which is equal to $Der\,c\,((L(r))^*)$. Now $(L(r))^*$ is defined +as $\bigcup_{n \ge 0} L(r)$. We can write this as $L(r)^0 \cup \bigcup_{n \ge 1} L(r)$, where we just +separated the first union and then let the ``big-union'' start from $1$. Form this we can already infer + +\begin{center} +$Der\,c\,(L(r^*)) = Der\,c\,(L(r)^0 \cup \bigcup_{n \ge 1} L(r)) = (Der\,c\,L(r)^0) \cup Der\,c\,(\bigcup_{n \ge 1} L(r))$ +\end{center} + +The first union ``disappears'' since $Der\,c\,(L(r)^0) = \varnothing$. + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: