# HG changeset patch # User Christian Urban # Date 1598972437 -3600 # Node ID c0bdd4ad69ca4cae7762d7f9b40790832e086b05 # Parent 4b208d81e002cb0e5e573954f71fed4d8993b92b updated diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw01.pdf Binary file coursework/cw01.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw01.tex --- a/coursework/cw01.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,337 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\usepackage{array} - - -\begin{document} -\newcolumntype{C}[1]{>{\centering}m{#1}} - -\section*{Coursework 1} - -This coursework is worth 5\% and is due on \cwONE{} at 18:00. You are -asked to implement a regular expression matcher and submit a document -containing the answers for the questions below. You can do the -implementation in any programming language you like, but you need to -submit the source code with which you answered the questions, -otherwise a mark of 0\% will be awarded. You can submit your answers -in a txt-file or pdf. Code send as code. Please package everything -inside a zip-file that creates a directory with the name -\[\texttt{YournameYourfamilyname}\] - -\noindent on my end. Thanks! - - - -\subsubsection*{Disclaimer\alert} - -It should be understood that the work you submit represents -your own effort. You have not copied from anyone else. An -exception is the Scala code I showed during the lectures or -uploaded to KEATS, which you can freely use.\bigskip - -\noindent -If you have any questions, please send me an email in \textbf{good} -time.\bigskip - -\subsection*{Task} - -The task is to implement a regular expression matcher based on -derivatives of regular expressions. The implementation should -be able to deal with the usual (basic) regular expressions - -\[ -\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* -\] - -\noindent -but also with the following extended regular expressions: - -\begin{center} -\begin{tabular}{ll} - $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\ - $r^+$ & one or more times $r$\\ - $r^?$ & optional $r$\\ - $r^{\{n\}}$ & exactly $n$-times\\ - $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\ - $r^{\{n..\}}$ & at least $n$-times $r$\\ - $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ - $\sim{}r$ & not-regular-expression of $r$\\ -\end{tabular} -\end{center} - -\noindent You can assume that $n$ and $m$ are greater or equal than -$0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip - -\noindent {\bf Important!} Your implementation should have explicit -case classes for the basic regular expressions, but also explicit case -classes for -the extended regular expressions.\footnote{Please call them - \code{RANGE}, \code{PLUS}, \code{OPTIONAL}, \code{NTIMES}, - \code{UPTO}, \code{FROM} and \code{BETWEEN}.} - That means do not treat the extended regular expressions -by just translating them into the basic ones. See also Question 3, -where you are asked to explicitly give the rules for \textit{nullable} -and \textit{der} for the extended regular expressions. Something like - -\[der\,c\,(r^+) \dn der\,c\,(r\cdot r^*)\] - -\noindent is \emph{not} allowed as answer in Question 3 and \emph{not} -allowed in your code.\medskip - -\noindent -The meanings of the extended regular expressions are - -\begin{center} -\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l} - $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ - $L(r^+)$ & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\ - $L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\ - $L(r^{\{n\}})$ & $\dn$ & $L(r)^n$\\ - $L(r^{\{..m\}})$ & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\ - $L(r^{\{n..\}})$ & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\ - $L(r^{\{n..m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\ - $L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$ -\end{tabular} -\end{center} - -\noindent whereby in the last clause the set $\Sigma^*$ stands -for the set of \emph{all} strings over the alphabet $\Sigma$ -(in the implementation the alphabet can be just what is -represented by, say, the type \pcode{Char}). So $\sim{}r$ -means in effect ``all the strings that $r$ cannot match''.\medskip - -\noindent -Be careful that your implementation of \textit{nullable} and -\textit{der} satisfies for every regular expression $r$ the following -two properties (see also Question 3): - -\begin{itemize} -\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ -\item $L(der\,c\,r) = Der\,c\,(L(r))$ -\end{itemize} - - - -\subsection*{Question 1 (Unmarked)} - -What is your King's email address (you will need it in -Question 5)? - -\subsection*{Question 2 (Unmarked)} - -Can you please list all programming languages in which you have -already written programs (include only instances where you have spent -at least a good working day fiddling with a program)? This is just -for my curiosity to estimate what your background is. - -\subsection*{Question 3} - -From the -lectures you have seen the definitions for the functions -\textit{nullable} and \textit{der} for the basic regular -expressions. Implement and write down the rules for the extended -regular expressions: - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} - $\textit{nullable}([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ - $\textit{nullable}(r^+)$ & $\dn$ & $?$\\ - $\textit{nullable}(r^?)$ & $\dn$ & $?$\\ - $\textit{nullable}(r^{\{n\}})$ & $\dn$ & $?$\\ - $\textit{nullable}(r^{\{..m\}})$ & $\dn$ & $?$\\ - $\textit{nullable}(r^{\{n..\}})$ & $\dn$ & $?$\\ - $\textit{nullable}(r^{\{n..m\}})$ & $\dn$ & $?$\\ - $\textit{nullable}(\sim{}r)$ & $\dn$ & $?$ -\end{tabular} -\end{center} - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} - $der\, c\, ([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ - $der\, c\, (r^+)$ & $\dn$ & $?$\\ - $der\, c\, (r^?)$ & $\dn$ & $?$\\ - $der\, c\, (r^{\{n\}})$ & $\dn$ & $?$\\ - $der\, c\, (r^{\{..m\}})$ & $\dn$ & $?$\\ - $der\, c\, (r^{\{n..\}})$ & $\dn$ & $?$\\ - $der\, c\, (r^{\{n..m\}})$ & $\dn$ & $?$\\ - $der\, c\, (\sim{}r)$ & $\dn$ & $?$\\ -\end{tabular} -\end{center} - -\noindent -Remember your definitions have to satisfy the two properties - -\begin{itemize} -\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ -\item $L(der\,c\,r)) = Der\,c\,(L(r))$ -\end{itemize} - -\noindent -Given the definitions of \textit{nullable} and \textit{der}, it is -easy to implement a regular expression matcher. Test your regular -expression matcher with (at least) the examples: - - -\begin{center} -\def\arraystretch{1.2} -\begin{tabular}{@{}r|m{3mm}|m{6mm}|m{6mm}|m{10mm}|m{6mm}|m{10mm}|m{10mm}|m{10mm}} - string & $a^?$ & $\sim{}a$ & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ & - $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$ \\\hline - $[]$ &&&&&&& \\\hline - \texttt{a} &&&&&&& \\\hline - \texttt{aa} &&&&&&& \\\hline - \texttt{aaa} &&&&&&& \\\hline - \texttt{aaaaa} &&&&&&& \\\hline - \texttt{aaaaaa}&&&&&&& \\ -\end{tabular} -\end{center} - -\noindent -Does your matcher produce the expected results? Make sure you -also test corner-cases, like $a^{\{0\}}$! - -\subsection*{Question 4} - -As you can see, there are a number of explicit regular expressions -that deal with single or several characters, for example: - -\begin{center} -\begin{tabular}{ll} - $c$ & matches a single character\\ - $[c_1,c_2,\ldots,c_n]$ & matches a set of characters---for character ranges\\ - $\textit{ALL}$ & matches any character -\end{tabular} -\end{center} - -\noindent -The latter is useful for matching any string (for example -by using $\textit{ALL}^*$). In order to avoid having an explicit constructor -for each case, we can generalise all these cases and introduce a single -constructor $\textit{CFUN}(f)$ where $f$ is a function from characters -to booleans. In Scala code this would look as follows: - -\begin{lstlisting}[numbers=none] -abstract class Rexp -... -case class CFUN(f: Char => Boolean) extends Rexp -\end{lstlisting}\smallskip - -\noindent -The idea is that the function $f$ determines which character(s) -are matched, namely those where $f$ returns \texttt{true}. -In this question implement \textit{CFUN} and define - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} - $\textit{nullable}(\textit{CFUN}(f))$ & $\dn$ & $?$\\ - $\textit{der}\,c\,(\textit{CFUN}(f))$ & $\dn$ & $?$ -\end{tabular} -\end{center} - -\noindent in your matcher and then also give definitions for - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} - $c$ & $\dn$ & $\textit{CFUN}(?)$\\ - $[c_1,c_2,\ldots,c_n]$ & $\dn$ & $\textit{CFUN}(?)$\\ - $\textit{ALL}$ & $\dn$ & $\textit{CFUN}(?)$ -\end{tabular} -\end{center} - -\noindent -You can either add the constructor $CFUN$ to your implementation in -Question 3, or you can implement this questions first -and then use $CFUN$ instead of \code{RANGE} and \code{CHAR} in Question 3. - - -\subsection*{Question 5} - -Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression - -\[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\] - -\noindent -Define in your code the following regular expression for email addresses - -\[ -([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) -\] - -\noindent and calculate the derivative according to your own email -address. When calculating the derivative, simplify all regular -expressions as much as possible by applying the -following 7 simplification rules: - -\begin{center} -\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}ll} -$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ -$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ -$r \cdot \ONE$ & $\mapsto$ & $r$\\ -$\ONE \cdot r$ & $\mapsto$ & $r$\\ -$r + \ZERO$ & $\mapsto$ & $r$\\ -$\ZERO + r$ & $\mapsto$ & $r$\\ -$r + r$ & $\mapsto$ & $r$\\ -\end{tabular} -\end{center} - -\noindent Write down your simplified derivative in a readable -notation using parentheses where necessary. That means you -should use the infix notation $+$, $\cdot$, $^*$ and so on, -instead of raw code.\bigskip - - -\subsection*{Question 6} - -Implement the simplification rules in your regular expression matcher. -Consider the regular expression $/ \cdot * \cdot -(\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot * -\cdot /$ and decide whether the following four strings are matched by -this regular expression. Answer yes or no. - -\begin{enumerate} -\item \texttt{"/**/"} -\item \texttt{"/*foobar*/"} -\item \texttt{"/*test*/test*/"} -\item \texttt{"/*test/*test*/"} -\end{enumerate} - -\subsection*{Question 7} - -Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be -$(a^{\{19,19\}}) \cdot (a^?)$.\medskip - -\noindent -Decide whether the following three -strings consisting of $a$s only can be matched by $(r_1^+)^+$. -Similarly test them with $(r_2^+)^+$. Again answer in all six cases -with yes or no. \medskip - -\noindent -These are strings are meant to be entirely made up of $a$s. Be careful -when copy-and-pasting the strings so as to not forgetting any $a$ and -to not introducing any other character. - -\begin{enumerate} -\setcounter{enumi}{4} -\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} -\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} -\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} -\end{enumerate} - - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw02.pdf Binary file coursework/cw02.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw02.tex --- a/coursework/cw02.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,222 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -\section*{Coursework 2} - -\noindent This coursework is worth 8\% and is due on \cwTWO{} at -18:00. You are asked to implement the Sulzmann \& Lu lexer for the -WHILE language. You can do the implementation in any programming -language you like, but you need to submit the source code with which -you answered the questions, otherwise a mark of 0\% will be -awarded. You can submit your answers in a txt-file or as pdf. Code -submit as code. Please package everything in a zip-file that creates a -directory with the name \texttt{YournameYourfamilyname} on my end. Thanks! - -\subsection*{Disclaimer\alert} - -It should be understood that the work you submit represents -your own effort. You have not copied from anyone else. An -exception is the Scala code from KEATS and the code I showed -during the lectures, which you can both freely use. You can -also use your own code from the CW~1. - -\subsection*{Question 1} - -To implement a lexer for the WHILE language, you first -need to design the appropriate regular expressions for the -following eleven syntactic entities: - -\begin{enumerate} -\item keywords are - -\begin{center} -\texttt{while}, -\texttt{if}, -\texttt{then}, -\texttt{else}, -\texttt{do}, -\texttt{for}, -\texttt{to}, -\texttt{true}, -\texttt{false}, -\texttt{read}, -\texttt{write}, -\texttt{skip} -\end{center} - -\item operators are: -\texttt{+}, -\texttt{-}, -\texttt{*}, -\texttt{\%}, -\texttt{/}, -\texttt{==}, -\texttt{!=}, -\texttt{>}, -\texttt{<}, -\texttt{<=}, -\texttt{>=}, -\texttt{:=}, -\texttt{\&\&}, -\texttt{||} - -\item letters are uppercase and lowercase - -\item symbols are letters plus the characters - \texttt{.}, - \texttt{\_}, - \texttt{>}, - \texttt{<}, - \texttt{=}, - \texttt{;}, - \texttt{,} and - \texttt{:} - -\item strings are enclosed by \texttt{"\ldots"} and consisting of - symbols, whitespaces and digits -\item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}} -\item there are semicolons \texttt{;} -\item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or - \texttt{$\backslash$t} -\item identifiers are letters followed by underscores \texttt{\_\!\_}, letters -or digits -\item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give -a regular expression that can recognise \pcode{0}, but not numbers -with leading zeroes, such as \pcode{001} -\item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line -\end{enumerate} - -\noindent -You can use the basic regular expressions - -\[ -\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* -\] - -\noindent -but also the following extended regular expressions - -\begin{center} -\begin{tabular}{ll} -$[c_1,c_2,\ldots,c_n]$ & a set of characters\\ -$r^+$ & one or more times $r$\\ -$r^?$ & optional $r$\\ -$r^{\{n\}}$ & n-times $r$\\ -\end{tabular} -\end{center} - -\noindent -Later on you will also need the record regular expression: - -\begin{center} -\begin{tabular}{ll} -$REC(x:r)$ & record regular expression\\ -\end{tabular} -\end{center} - -\noindent Try to design your regular expressions to be as -small as possible. For example you should use character sets -for identifiers and numbers. Feel free to use the general -character constructor \textit{CFUN} introduced in CW 1. - -\subsection*{Question 2} - -Implement the Sulzmann \& Lu lexer from the lectures. For -this you need to implement the functions $nullable$ and $der$ -(you can use your code from CW~1), as well as $mkeps$ and -$inj$. These functions need to be appropriately extended for -the extended regular expressions from Q1. Write down the -clauses for - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} -$mkeps([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ -$mkeps(r^+)$ & $\dn$ & $?$\\ -$mkeps(r^?)$ & $\dn$ & $?$\\ -$mkeps(r^{\{n\}})$ & $\dn$ & $?$\medskip\\ -$inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^+)\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^?)\,c\,\ldots$ & $\dn$ & $?$\\ -$inj\, (r^{\{n\}})\,c\,\ldots$ & $\dn$ & $?$\\ -\end{tabular} -\end{center} - -\noindent where $inj$ takes three arguments: a regular -expression, a character and a value. Test your lexer code -with at least the two small examples below: - -\begin{center} -\begin{tabular}{ll} -regex: & string:\smallskip\\ -$a^{\{3\}}$ & $aaa$\\ -$(a + \ONE)^{\{3\}}$ & $aa$ -\end{tabular} -\end{center} - - -\noindent Both strings should be successfully lexed by the -respective regular expression, that means the lexer returns -in both examples a value. - - -Also add the record regular expression from the -lectures to your lexer and implement a function, say -\pcode{env}, that returns all assignments from a value (such -that you can extract easily the tokens from a value).\medskip - -\noindent -Finally give the tokens for your regular expressions from Q1 and the -string - -\begin{center} -\code{"read n;"} -\end{center} - -\noindent -and use your \pcode{env} function to give the token sequence. - - -\subsection*{Question 3} - -Extend your lexer from Q2 to also simplify regular expressions after -each derivation step and rectify the computed values after each -injection. Use this lexer to tokenize the programs in -Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on -KEATS. Give the tokens of these programs where whitespaces are -filtered out. Make sure you can tokenise \textbf{exactly} these -programs.\bigskip - - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}} -\caption{Fibonacci program in the WHILE language.\label{fib}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}} -\caption{The three-nested-loops program in the WHILE language. -(Usually used for timing measurements.)\label{loop}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}} -\caption{A program that calculates factors for numbers in the WHILE - language.\label{factors}} -\end{figure} - -\begin{figure}[h] -\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}} -\caption{A program that calculates the Collatz series for numbers - between 1 and 100.\label{collatz}} -\end{figure} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw03.pdf Binary file coursework/cw03.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw03.tex --- a/coursework/cw03.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,167 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -\section*{Coursework 3} - - - -\noindent This coursework is worth 10\% and is due on \cwTHREE{} at -18:00. You are asked to implement a parser for the WHILE language and -also an interpreter. You can do the implementation in any programming -language you like, but you need to submit the source code with which -you answered the questions, otherwise a mark of 0\% will be -awarded. You should use the lexer from the previous coursework for the -parser. Please package everything(!) in a zip-file that creates a -directory with the name \texttt{YournameYourFamilyname} on my end. - -\subsection*{Disclaimer\alert} - -It should be understood that the work you submit represents your own -effort. You have not copied from anyone else. An exception is the -Scala code I showed during the lectures or uploaded to KEATS, which -you can both use. You can also use your own code from the CW~1 and -CW~2. - - -\subsection*{Question 1} - -Design a grammar for the WHILE language and give the grammar -rules. The main categories of non-terminals should be: - -\begin{itemize} -\item arithmetic expressions (with the operations from the - previous coursework, that is \pcode{+}, \pcode{-}, \pcode{*}, - \pcode{/} and \pcode{\%}) -\item boolean expressions (with the operations \pcode{==}, \pcode{<}, \pcode{>}, - \code{>=}, \code{<=}, - \code{!=}, \pcode{&&}, \pcode{||}, \pcode{true} and \pcode{false}) -\item single statements (that is \pcode{skip}, assignments, \pcode{if}s, - \pcode{while}-loops, \pcode{read} and \pcode{write}) -\item compound statements separated by semicolons -\item blocks which are enclosed in curly parentheses -\end{itemize} - -\noindent -Make sure the grammar is not left-recursive. - -\subsection*{Question 2} - -You should implement a parser for the WHILE language using parser -combinators. Be careful that the parser takes as input a stream, or -list, of \emph{tokens} generated by the tokenizer from the previous -coursework. For this you might want to filter out whitespaces and -comments. Your parser should be able to handle the WHILE programs in -Figures~\ref{fib}, \ref{loop} and \ref{primes}. In addition give the -parse tree for the statement: - -\begin{lstlisting}[language=While,numbers=none] -if (a < b) then skip else a := a * b + 1 -\end{lstlisting} - -\noindent -A (possibly incomplete) datatype for parse trees in Scala is shown -in Figure~\ref{trees}. - -\begin{figure}[p] -\begin{lstlisting}[language=Scala] -abstract class Stmt -abstract class AExp -abstract class BExp - -type Block = List[Stmt] - -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Read(s: String) extends Stmt -case class WriteVar(s: String) extends Stmt -case class WriteStr(s: String) extends Stmt - // for printing variables and strings - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp -case class Lop(o: String, b1: BExp, b2: BExp) extends BExp - // logical operations: and, or -\end{lstlisting} -\caption{The datatype for parse trees in Scala.\label{trees}} -\end{figure} - -\subsection*{Question 3} - -Implement an interpreter for the WHILE language you designed -and parsed in Question 1 and 2. This interpreter should take -as input a parse tree. However be careful because, programs -contain variables and variable assignments. This means -you need to maintain a kind of memory, or environment, -where you can look up a value of a variable and also -store a new value if it is assigned. Therefore an -evaluation function (interpreter) needs to look roughly as -follows - -\begin{lstlisting}[numbers=none] -eval_stmt(stmt, env) -\end{lstlisting} - -\noindent -where \pcode{stmt} corresponds to the parse tree -of the program and \pcode{env} is an environment -acting as a store for variable values. -Consider the Fibonacci program in Figure~\ref{fib}. -At the beginning of the program this store will be -empty, but needs to be extended in line 3 and 4 where -the variables \pcode{minus1} and \pcode{minus2} -are assigned values. These values need to be reassigned in -lines 7 and 8. The program should be interpreted -according to straightforward rules: for example an -if-statement will ``run'' the if-branch if the boolean -evaluates to \pcode{true}, otherwise the else-branch. -Loops should be run as long as the boolean is \pcode{true}. -Programs you should be able to run are shown in -Figures \ref{fib} -- \ref{collatz}. - - -Give some time measurements for your interpreter -and the loop program in Figure~\ref{loop}. For example -how long does your interpreter take when \pcode{start} -is initialised with 100, 500 and so on. How far can -you scale this value if you are willing to wait, say -1 Minute? - -\begin{figure}[h] -\lstinputlisting[language=while,xleftmargin=20mm]{../progs/while-tests/fib.while} -\caption{Fibonacci program in the WHILE language.\label{fib}} -\end{figure} - -\begin{figure}[h] -\lstinputlisting[language=while,xleftmargin=20mm]{../progs/while-tests/loops.while} -\caption{The three-nested-loops program in the WHILE language. -Usually used for timing measurements.\label{loop}} -\end{figure} - -\begin{figure}[h] -\lstinputlisting[language=while,xleftmargin=0mm]{../progs/while-tests/primes.while} -\caption{Prime number program.\label{primes}} -\end{figure} - - -\begin{figure}[p] -\lstinputlisting[language=while,xleftmargin=0mm]{../progs/while-tests/collatz2.while} -\caption{Collatz series program.\label{collatz}} -\end{figure} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw04.pdf Binary file coursework/cw04.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw04.tex --- a/coursework/cw04.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,421 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -%https://github.com/Storyyeller/Krakatau -%https://docs.oracle.com/javase/specs/jvms/se7/html/ - -% Jasmin Tutorial -%http://saksagan.ceng.metu.edu.tr/courses/ceng444/link/jvm-cpm.html - -\section*{Coursework 4} - -\noindent This coursework is worth 10\% and is due on \cwFOUR{} -at 18:00. You are asked to implement a compiler for -the WHILE language that targets the assembler language -provided by Jasmin or Krakatau (both have very similar -syntax). You can do the implementation in any programming -language you like, but you need to submit the source code with -which you answered the questions, otherwise a mark of 0\% will -be awarded. You should use the lexer and parser from the -previous courseworks. Please package \emph{everything}(!) in -a zip-file that creates a directory with the name -\texttt{YournameYourFamilyname} on my end. - -\subsection*{Disclaimer\alert} - -It should be understood that the work you submit represents -your own effort. You have not copied from anyone else. An -exception is the Scala code I showed during the lectures, -which you can use. You can also use your own code from the -CW~1, CW~2 and CW~3. - - -\subsection*{Jasmin Assembler} - -The Jasmin assembler is available from - -\begin{center} -\url{http://jasmin.sourceforge.net} -\end{center} - -\noindent -There is a user guide for Jasmin - -\begin{center} -\url{http://jasmin.sourceforge.net/guide.html} -\end{center} - -\noindent and also a description of some of the instructions -that the JVM understands - -\begin{center} -\url{http://jasmin.sourceforge.net/instructions.html} -\end{center} - -\noindent If you generated a correct assembler file for -Jasmin, for example \texttt{loops.j}, you can use - -\begin{center} -\texttt{java -jar jasmin-2.4/jasmin.jar loops.j} -\end{center} - -\noindent in order to translate it into Java Byte Code. The -resulting class file can be run with - -\begin{center} -\texttt{java loops} -\end{center} - -\noindent where you might need to give the correct path to the -class file. For example: - -\begin{center} -\texttt{java -cp . loops/loops} -\end{center} - -\noindent There are also other resources about Jasmin on the -Internet, for example - -\begin{center} -\small\url{http://www.ceng.metu.edu.tr/courses/ceng444/link/f3jasmintutorial.html} -\end{center} - -\noindent and - -\begin{center} - \small\url{http://www.csc.villanova.edu/~tway/courses/csc4181/s2018/labs/lab4/JVM.pdf} -\end{center} - -\subsection*{Krakatau Assembler} - -The Krakatau assembler is available from - -\begin{center} -\url{https://github.com/Storyyeller/Krakatau} -\end{center} - -\noindent This assembler requires Python and a package called -\pcode{ply} available from - -\begin{center} -\url{https://pypi.python.org/pypi/ply} -\end{center} - -\noindent This assembler is largely compatible with the Jasmin -syntax---that means for the files we are concerned with here, -it understands the same input syntax (no changes to your -compiler need to be made; ok maybe some small syntactic -adjustments are needed). You can generate Java Byte Code by -using - -\begin{center} -\texttt{python Krakatau-master/assemble.py loops.j} -\end{center} - -\noindent where you may have to adapt the directory where -Krakatau is installed (I just downloaded the zip file from -Github and \pcode{Krakatau-master} was the directory where it -was installed). Again the resulting class-file you can run with -\texttt{java}. - - -%\noindent You need to submit a document containing the answers -%for the two questions below. You can do the implementation in -%any programming language you like, but you need to submit the -%source code with which you answered the questions. Otherwise -%the submission will not be counted. However, the coursework -%will \emph{only} be judged according to the answers. You can -%submit your answers in a txt-file or as pdf.\bigskip - - -\subsection*{Question 1} - -You need to lex and parse WHILE programs, and then generate -Java Byte Code instructions for the Jasmin assembler (or -Krakatau assembler). As solution you need to submit the -assembler instructions for the Fibonacci and Factorial -programs. Both should be so modified that a user can input on -the console which Fibonacci number and which Factorial should -be calculated. The Fibonacci program is given in -Figure~\ref{fibs}. You can write your own program for -calculating factorials. Submit your assembler code as -a file that can be run, not as PDF-text. - -\begin{figure}[t] -\lstinputlisting[language=while]{../progs/while-tests/fib.while} -\caption{The Fibonacci program in the WHILE language.\label{fibs}} -\end{figure} - -\subsection*{Question 2} - -Extend the syntax of your language so that it contains also -\texttt{for}-loops, like - -\begin{center} -\lstset{language=While} -\code{for} \;\textit{Id} \texttt{:=} \textit{AExp}\; \code{upto} -\;\textit{AExp}\; \code{do} \textit{Block} -\end{center} - -\noindent The intended meaning is to first assign the variable -\textit{Id} the value of the first arithmetic expression, test -whether this value is less or equal than the value of the -second arithmetic expression. If yes, go through the loop, and -at the end increase the value of the loop variable by 1 and -start again with the test. If no, leave the loop. For example -the following instance of a \code{for}-loop is supposed to -print out the numbers \pcode{2}, \pcode{3}, \pcode{4}. - - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=While, numbers=none] -for i := 2 upto 4 do { - write i -} -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent There are two ways how this can be implemented: one -is to adapt the code generation part of the compiler and -generate specific code for \code{for}-loops; the other is to -translate the abstract syntax tree of \code{for}-loops into -an abstract syntax tree using existing language constructs. -For example the loop above could be translated to the -following \code{while}-loop: - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=While, numbers=none] -i := 2; -while (i <= 4) do { - write i; - i := i + 1; -} -\end{lstlisting} -\end{minipage} -\end{center} - -\subsection*{Question 3} - -\noindent In this question you are supposed to give the -assembler instructions for the program - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=While, numbers=none] -for i := 1 upto 10 do { - for i := 1 upto 10 do { - write i - } -} -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent -Note that in this program the variable \pcode{i} is used -twice. You need to make a decision how it should be compiled? -Explain your decision and indicate what this program would -print out. - -\subsection*{Further Information} - -The Java infrastructure unfortunately does not contain an -assembler out-of-the-box (therefore you need to download the -additional package Jasmin or Krakatau---see above). But it -does contain a disassembler, called \texttt{javap}. A -dissembler does the ``opposite'' of an assembler: it generates -readable assembler code from Java Byte Code. Have a look at -the following example: Compile using the usual Java compiler -the simple Hello World program below: - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=Java,numbers=none] -class HelloWorld { - public static void main(String[] args) { - System.out.println("Hello World!"); - } -} -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent -You can use the command - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language={},numbers=none] -javap -v HelloWorld -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent to see the assembler instructions of the Java Byte -Code that has been generated for this program. You can compare -this with the code generated for the Scala version of Hello -World. - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=Scala,numbers=none] -object HelloWorld { - def main(args: Array[String]) { - println("Hello World!") - } -} -\end{lstlisting} -\end{minipage} -\end{center} - - -\subsection*{Library Functions} - -You need to generate code for the commands \texttt{write} and -\texttt{read}. This will require the addition of some -``library'' functions to your generated code. The first -command even needs two versions, because you need to write out -an integer and string. The Java byte code will need two -separate functions for this. For writing out an integer, you -can use the assembler code - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=JVMIS, numbers=none] -.method public static write(I)V - .limit locals 1 - .limit stack 2 - getstatic java/lang/System/out Ljava/io/PrintStream; - iload 0 - invokevirtual java/io/PrintStream/println(I)V - return -.end method -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent This function will invoke Java's \texttt{println} -function for integers. Then if you need to generate code for -\texttt{write x} where \texttt{x} is an integer variable, you -can generate - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=JVMIS, numbers=none] -iload n -invokestatic XXX/XXX/write(I)V -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent where \texttt{n} is the index where the value of the -variable \texttt{x} is stored. The \texttt{XXX/XXX} needs to -be replaced with the class name which you use to generate the -code (for example \texttt{fib/fib} in case of the Fibonacci -numbers). - -Writing out a string is similar. The corresponding library -function uses strings instead of integers: - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=JVMIS, numbers=none] -.method public static writes(Ljava/lang/String;)V - .limit stack 2 - .limit locals 1 - getstatic java/lang/System/out Ljava/io/PrintStream; - aload 0 - invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V - return -.end method -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent The code that needs to be generated for \code{write -"some_string"} commands is - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=JVMIS,numbers=none] -ldc "some_string" -invokestatic XXX/XXX/writes(Ljava/lang/String;)V -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent Again you need to adjust the \texttt{XXX/XXX} part -in each call. - -The code for \texttt{read} is more complicated. The reason is -that inputting a string will need to be transformed into an -integer. The code in Figure~\ref{read} does this. It can be -called with - -\begin{center} -\begin{minipage}{12cm} -\begin{lstlisting}[language=JVMIS,numbers=none] -invokestatic XXX/XXX/read()I -istore n -\end{lstlisting} -\end{minipage} -\end{center} - -\noindent -where \texttt{n} is the index of the variable that requires an input. If you -use Windows you need to take into account that a ``return'' is not just a newline, -\code{'\\10'}, but \code{'\\13\\10'}. This means you need to change line~12 in -Figure~\ref{read} to \pcode{ldc 13}. - - -\begin{figure}[t]\small -\begin{lstlisting}[language=JVMIS,numbers=left] -.method public static read()I - .limit locals 10 - .limit stack 10 - - ldc 0 - istore 1 ; this will hold our final integer -Label1: - getstatic java/lang/System/in Ljava/io/InputStream; - invokevirtual java/io/InputStream/read()I - istore 2 - iload 2 - ldc 10 ; the newline delimiter for Unix (Windows 13) - isub - ifeq Label2 - iload 2 - ldc 32 ; the space delimiter - isub - ifeq Label2 - iload 2 - ldc 48 ; we have our digit in ASCII, have to subtract it from 48 - isub - ldc 10 - iload 1 - imul - iadd - istore 1 - goto Label1 -Label2: - ;when we come here we have our integer computed in Local Variable 1 - iload 1 - ireturn -.end method -\end{lstlisting}\normalsize -\caption{Assembler code for reading an integer from the console.\label{read}} -\end{figure} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw05.pdf Binary file coursework/cw05.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw05.tex --- a/coursework/cw05.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -\section*{Coursework 5} - - - -\noindent This coursework is worth 12\% and is due on \cwFIVE{} at -18:00. You are asked to implement a compiler targetting the LLVM-IR. -You can do the implementation in any programming -language you like, but you need to submit the source code with which -you answered the questions, otherwise a mark of 0\% will be -awarded. You should use the lexer from the previous coursework for the -parser. Please package everything(!) in a zip-file that creates a -directory with the name \texttt{YournameYourFamilyname} on my end. - -\subsection*{Disclaimer\alert} - -It should be understood that the work you submit represents your own -effort. You have not copied from anyone else. An exception is the -Scala code I showed during the lectures or uploaded to KEATS, which -you can both use. You can also use your own code from the CW~1 -- -CW~4. - - -\subsection*{Question 1} - -\subsection*{Question 2} - -\subsection*{Question 3} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw0A.tex --- a/coursework/cw0A.tex Tue Sep 01 15:57:55 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,245 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} - -\begin{document} - -\section*{Coursework (Strand 2)} - -\noindent This coursework is worth 20\% and is due on \cwISABELLE{} at -18:00. You are asked to prove the correctness of the regular expression -matcher from the lectures using the Isabelle theorem prover. You need to -submit a theory file containing this proof and also a document -describing your proof. The Isabelle theorem prover is available from - -\begin{center} -\url{http://isabelle.in.tum.de} -\end{center} - -\noindent This is an interactive theorem prover, meaning that -you can make definitions and state properties, and then help -the system with proving these properties. Sometimes the proofs -are also completely automatic. There is a shortish user guide for -Isabelle, called ``Programming and Proving in Isabelle/HOL'' -at - -\begin{center} -\url{http://isabelle.in.tum.de/documentation.html} -\end{center} - -\noindent -and also a longer (free) book at - -\begin{center} -\url{http://www.concrete-semantics.org} -\end{center} - -\noindent The Isabelle theorem prover is operated through the -jEdit IDE, which might not be an editor that is widely known. -JEdit is documented in - -\begin{center} -\url{http://isabelle.in.tum.de/dist/Isabelle2014/doc/jedit.pdf} -\end{center} - - -\noindent If you need more help or you are stuck somewhere, -please feel free to contact me (christian.urban at kcl.ac.uk). I -am one of the main developers of Isabelle and have used it for -approximately 16 years. One of the success stories of -Isabelle is the recent verification of a microkernel operating -system by an Australian group, see \url{http://sel4.systems}. -Their operating system is the only one that has been proved -correct according to its specification and is used for -application where high assurance, security and reliability is -needed (like in helicopters which fly over enemy territory). - - -\subsection*{The Task} - -In this coursework you are asked to prove the correctness of the -regular expression matcher from the lectures in Isabelle. The matcher -should be able to deal with the usual (basic) regular expressions - -\[ -\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* -\] - -\noindent -but also with the following extended regular expressions: - -\begin{center} -\begin{tabular}{ll} - $r^{\{n\}}$ & exactly $n$-times\\ - $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\ - $r^{\{n..\}}$ & at least $n$-times $r$\\ - $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ - $\sim{}r$ & not-regular-expression of $r$\\ -\end{tabular} -\end{center} - - -\noindent -You need to first specify what the matcher is -supposed to do and then to implement the algorithm. Finally you need -to prove that the algorithm meets the specification. The first two -parts are relatively easy, because the definitions in Isabelle will -look very similar to the mathematical definitions from the lectures or -the Scala code that is supplied at KEATS. For example very similar to -Scala, regular expressions are defined in Isabelle as an inductive -datatype: - -\begin{lstlisting}[language={},numbers=none] -datatype rexp = - ZERO -| ONE -| CHAR char -| SEQ rexp rexp -| ALT rexp rexp -| STAR rexp -\end{lstlisting} - -\noindent The meaning of regular expressions is given as -usual: - -\begin{center} -\begin{tabular}{rcl@{\hspace{10mm}}l} -$L(\ZERO)$ & $\dn$ & $\varnothing$ & \pcode{ZERO}\\ -$L(\ONE)$ & $\dn$ & $\{[]\}$ & \pcode{ONE}\\ -$L(c)$ & $\dn$ & $\{[c]\}$ & \pcode{CHAR}\\ -$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ & \pcode{ALT}\\ -$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ & \pcode{SEQ}\\ -$L(r^*)$ & $\dn$ & $(L(r))^*$ & \pcode{STAR}\\ -\end{tabular} -\end{center} - -\noindent You would need to implement this function in order -to state the theorem about the correctness of the algorithm. -The function $L$ should in Isabelle take a \pcode{rexp} as -input and return a set of strings. Its type is -therefore - -\begin{center} -\pcode{L} \pcode{::} \pcode{rexp} $\Rightarrow$ \pcode{string set} -\end{center} - -\noindent Isabelle treats strings as an abbreviation for lists -of characters. This means you can pattern-match strings like -lists. The union operation on sets (for the \pcode{ALT}-case) -is a standard definition in Isabelle, but not the -concatenation operation on sets and also not the -star-operation. You would have to supply these definitions. -The concatenation operation can be defined in terms of the -append function, written \code{_ @ _} in Isabelle, for lists. -The star-operation can be defined as a ``big-union'' of -powers, like in the lectures, or directly as an inductive set. - -The functions for the matcher are shown in -Figure~\ref{matcher}. The theorem that needs to be proved is - -\begin{lstlisting}[numbers=none,language={},keywordstyle=\color{black}\ttfamily,mathescape] -theorem - "matches r s $\longleftrightarrow$ s $\in$ L r" -\end{lstlisting} - -\noindent which states that the function \emph{matches} is -true if and only if the string is in the language of the -regular expression. A proof for this lemma will need -side-lemmas about \pcode{nullable} and \pcode{der}. An example -proof in Isabelle that will not be relevant for the theorem -above is given in Figure~\ref{proof}. - -\begin{figure}[p] -\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] -fun - nullable :: "rexp $\Rightarrow$ bool" -where - "nullable ZERO = False" -| "nullable ONE = True" -| "nullable (CHAR _) = False" -| "nullable (ALT r1 r2) = (nullable(r1) $\vee$ nullable(r2))" -| "nullable (SEQ r1 r2) = (nullable(r1) $\wedge$ nullable(r2))" -| "nullable (STAR _) = True" - -fun - der :: "char $\Rightarrow$ rexp $\Rightarrow$ rexp" -where - "der c ZERO = ZERO" -| "der c ONE = ZERO" -| "der c (CHAR d) = (if c = d then ONE else ZERO)" -| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" -| "der c (SEQ r1 r2) = - (if (nullable r1) then ALT (SEQ (der c r1) r2) (der c r2) - else SEQ (der c r1) r2)" -| "der c (STAR r) = SEQ (der c r) (STAR r)" - -fun - ders :: "rexp $\Rightarrow$ string $\Rightarrow$ rexp" -where - "ders r [] = r" -| "ders r (c # s) = ders (der c r) s" - -fun - matches :: "rexp $\Rightarrow$ string $\Rightarrow$ bool" -where - "matches r s = nullable (ders r s)" -\end{lstlisting} -\caption{The definition of the matcher algorithm in -Isabelle.\label{matcher}} -\end{figure} - -\begin{figure}[p] -\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] -fun - zeroable :: "rexp $\Rightarrow$ bool" -where - "zeroable ZERO = True" -| "zeroable ONE = False" -| "zeroable (CHAR _) = False" -| "zeroable (ALT r1 r2) = (zeroable(r1) $\wedge$ zeroable(r2))" -| "zeroable (SEQ r1 r2) = (zeroable(r1) $\vee$ zeroable(r2))" -| "zeroable (STAR _) = False" - -lemma - "zeroable r $\longleftrightarrow$ L r = {}" -proof (induct) - case (ZERO) - have "zeroable ZERO" "L ZERO = {}" by simp_all - then show "zeroable ZERO $\longleftrightarrow$ (L ZERO = {})" by simp -next - case (ONE) - have "$\neg$ zeroable ONE" "L ONE = {[]}" by simp_all - then show "zeroable ONE $\longleftrightarrow$ (L ONE = {})" by simp -next - case (CHAR c) - have "$\neg$ zeroable (CHAR c)" "L (CHAR c) = {[c]}" by simp_all - then show "zeroable (CHAR c) $\longleftrightarrow$ (L (CHAR c) = {})" by simp -next - case (ALT r1 r2) - have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact - have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact - show "zeroable (ALT r1 r2) $\longleftrightarrow$ (L (ALT r1 r2) = {})" - using ih1 ih2 by simp -next - case (SEQ r1 r2) - have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact - have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact - show "zeroable (SEQ r1 r2) $\longleftrightarrow$ (L (SEQ r1 r2) = {})" - using ih1 ih2 by (auto simp add: Conc_def) -next - case (STAR r) - have "$\neg$ zeroable (STAR r)" "[] $\in$ L (r) ^ 0" by simp_all - then show "zeroable (STAR r) $\longleftrightarrow$ (L (STAR r) = {})" - by (simp (no_asm) add: Star_def) blast -qed -\end{lstlisting} -\caption{An Isabelle proof about the function \texttt{zeroable}.\label{proof}} -\end{figure} - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca coursework/cw4.pdf Binary file coursework/cw4.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw01.pdf Binary file cws/cw01.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw01.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,337 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\usepackage{array} + + +\begin{document} +\newcolumntype{C}[1]{>{\centering}m{#1}} + +\section*{Coursework 1} + +This coursework is worth 5\% and is due on \cwONE{} at 18:00. You are +asked to implement a regular expression matcher and submit a document +containing the answers for the questions below. You can do the +implementation in any programming language you like, but you need to +submit the source code with which you answered the questions, +otherwise a mark of 0\% will be awarded. You can submit your answers +in a txt-file or pdf. Code send as code. Please package everything +inside a zip-file that creates a directory with the name +\[\texttt{YournameYourfamilyname}\] + +\noindent on my end. Thanks! + + + +\subsubsection*{Disclaimer\alert} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures or +uploaded to KEATS, which you can freely use.\bigskip + +\noindent +If you have any questions, please send me an email in \textbf{good} +time.\bigskip + +\subsection*{Task} + +The task is to implement a regular expression matcher based on +derivatives of regular expressions. The implementation should +be able to deal with the usual (basic) regular expressions + +\[ +\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* +\] + +\noindent +but also with the following extended regular expressions: + +\begin{center} +\begin{tabular}{ll} + $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\ + $r^+$ & one or more times $r$\\ + $r^?$ & optional $r$\\ + $r^{\{n\}}$ & exactly $n$-times\\ + $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\ + $r^{\{n..\}}$ & at least $n$-times $r$\\ + $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ + $\sim{}r$ & not-regular-expression of $r$\\ +\end{tabular} +\end{center} + +\noindent You can assume that $n$ and $m$ are greater or equal than +$0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip + +\noindent {\bf Important!} Your implementation should have explicit +case classes for the basic regular expressions, but also explicit case +classes for +the extended regular expressions.\footnote{Please call them + \code{RANGE}, \code{PLUS}, \code{OPTIONAL}, \code{NTIMES}, + \code{UPTO}, \code{FROM} and \code{BETWEEN}.} + That means do not treat the extended regular expressions +by just translating them into the basic ones. See also Question 3, +where you are asked to explicitly give the rules for \textit{nullable} +and \textit{der} for the extended regular expressions. Something like + +\[der\,c\,(r^+) \dn der\,c\,(r\cdot r^*)\] + +\noindent is \emph{not} allowed as answer in Question 3 and \emph{not} +allowed in your code.\medskip + +\noindent +The meanings of the extended regular expressions are + +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l} + $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ + $L(r^+)$ & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\ + $L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\ + $L(r^{\{n\}})$ & $\dn$ & $L(r)^n$\\ + $L(r^{\{..m\}})$ & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\ + $L(r^{\{n..\}})$ & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\ + $L(r^{\{n..m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\ + $L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$ +\end{tabular} +\end{center} + +\noindent whereby in the last clause the set $\Sigma^*$ stands +for the set of \emph{all} strings over the alphabet $\Sigma$ +(in the implementation the alphabet can be just what is +represented by, say, the type \pcode{Char}). So $\sim{}r$ +means in effect ``all the strings that $r$ cannot match''.\medskip + +\noindent +Be careful that your implementation of \textit{nullable} and +\textit{der} satisfies for every regular expression $r$ the following +two properties (see also Question 3): + +\begin{itemize} +\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ +\item $L(der\,c\,r) = Der\,c\,(L(r))$ +\end{itemize} + + + +\subsection*{Question 1 (Unmarked)} + +What is your King's email address (you will need it in +Question 5)? + +\subsection*{Question 2 (Unmarked)} + +Can you please list all programming languages in which you have +already written programs (include only instances where you have spent +at least a good working day fiddling with a program)? This is just +for my curiosity to estimate what your background is. + +\subsection*{Question 3} + +From the +lectures you have seen the definitions for the functions +\textit{nullable} and \textit{der} for the basic regular +expressions. Implement and write down the rules for the extended +regular expressions: + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $\textit{nullable}([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ + $\textit{nullable}(r^+)$ & $\dn$ & $?$\\ + $\textit{nullable}(r^?)$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{..m\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n..\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n..m\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(\sim{}r)$ & $\dn$ & $?$ +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $der\, c\, ([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ + $der\, c\, (r^+)$ & $\dn$ & $?$\\ + $der\, c\, (r^?)$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{..m\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n..\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n..m\}})$ & $\dn$ & $?$\\ + $der\, c\, (\sim{}r)$ & $\dn$ & $?$\\ +\end{tabular} +\end{center} + +\noindent +Remember your definitions have to satisfy the two properties + +\begin{itemize} +\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ +\item $L(der\,c\,r)) = Der\,c\,(L(r))$ +\end{itemize} + +\noindent +Given the definitions of \textit{nullable} and \textit{der}, it is +easy to implement a regular expression matcher. Test your regular +expression matcher with (at least) the examples: + + +\begin{center} +\def\arraystretch{1.2} +\begin{tabular}{@{}r|m{3mm}|m{6mm}|m{6mm}|m{10mm}|m{6mm}|m{10mm}|m{10mm}|m{10mm}} + string & $a^?$ & $\sim{}a$ & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ & + $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$ \\\hline + $[]$ &&&&&&& \\\hline + \texttt{a} &&&&&&& \\\hline + \texttt{aa} &&&&&&& \\\hline + \texttt{aaa} &&&&&&& \\\hline + \texttt{aaaaa} &&&&&&& \\\hline + \texttt{aaaaaa}&&&&&&& \\ +\end{tabular} +\end{center} + +\noindent +Does your matcher produce the expected results? Make sure you +also test corner-cases, like $a^{\{0\}}$! + +\subsection*{Question 4} + +As you can see, there are a number of explicit regular expressions +that deal with single or several characters, for example: + +\begin{center} +\begin{tabular}{ll} + $c$ & matches a single character\\ + $[c_1,c_2,\ldots,c_n]$ & matches a set of characters---for character ranges\\ + $\textit{ALL}$ & matches any character +\end{tabular} +\end{center} + +\noindent +The latter is useful for matching any string (for example +by using $\textit{ALL}^*$). In order to avoid having an explicit constructor +for each case, we can generalise all these cases and introduce a single +constructor $\textit{CFUN}(f)$ where $f$ is a function from characters +to booleans. In Scala code this would look as follows: + +\begin{lstlisting}[numbers=none] +abstract class Rexp +... +case class CFUN(f: Char => Boolean) extends Rexp +\end{lstlisting}\smallskip + +\noindent +The idea is that the function $f$ determines which character(s) +are matched, namely those where $f$ returns \texttt{true}. +In this question implement \textit{CFUN} and define + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $\textit{nullable}(\textit{CFUN}(f))$ & $\dn$ & $?$\\ + $\textit{der}\,c\,(\textit{CFUN}(f))$ & $\dn$ & $?$ +\end{tabular} +\end{center} + +\noindent in your matcher and then also give definitions for + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $c$ & $\dn$ & $\textit{CFUN}(?)$\\ + $[c_1,c_2,\ldots,c_n]$ & $\dn$ & $\textit{CFUN}(?)$\\ + $\textit{ALL}$ & $\dn$ & $\textit{CFUN}(?)$ +\end{tabular} +\end{center} + +\noindent +You can either add the constructor $CFUN$ to your implementation in +Question 3, or you can implement this questions first +and then use $CFUN$ instead of \code{RANGE} and \code{CHAR} in Question 3. + + +\subsection*{Question 5} + +Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression + +\[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\] + +\noindent +Define in your code the following regular expression for email addresses + +\[ +([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) +\] + +\noindent and calculate the derivative according to your own email +address. When calculating the derivative, simplify all regular +expressions as much as possible by applying the +following 7 simplification rules: + +\begin{center} +\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}ll} +$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ +$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ +$r \cdot \ONE$ & $\mapsto$ & $r$\\ +$\ONE \cdot r$ & $\mapsto$ & $r$\\ +$r + \ZERO$ & $\mapsto$ & $r$\\ +$\ZERO + r$ & $\mapsto$ & $r$\\ +$r + r$ & $\mapsto$ & $r$\\ +\end{tabular} +\end{center} + +\noindent Write down your simplified derivative in a readable +notation using parentheses where necessary. That means you +should use the infix notation $+$, $\cdot$, $^*$ and so on, +instead of raw code.\bigskip + + +\subsection*{Question 6} + +Implement the simplification rules in your regular expression matcher. +Consider the regular expression $/ \cdot * \cdot +(\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot * +\cdot /$ and decide whether the following four strings are matched by +this regular expression. Answer yes or no. + +\begin{enumerate} +\item \texttt{"/**/"} +\item \texttt{"/*foobar*/"} +\item \texttt{"/*test*/test*/"} +\item \texttt{"/*test/*test*/"} +\end{enumerate} + +\subsection*{Question 7} + +Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be +$(a^{\{19,19\}}) \cdot (a^?)$.\medskip + +\noindent +Decide whether the following three +strings consisting of $a$s only can be matched by $(r_1^+)^+$. +Similarly test them with $(r_2^+)^+$. Again answer in all six cases +with yes or no. \medskip + +\noindent +These are strings are meant to be entirely made up of $a$s. Be careful +when copy-and-pasting the strings so as to not forgetting any $a$ and +to not introducing any other character. + +\begin{enumerate} +\setcounter{enumi}{4} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\end{enumerate} + + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw02.pdf Binary file cws/cw02.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw02.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,222 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +\section*{Coursework 2} + +\noindent This coursework is worth 8\% and is due on \cwTWO{} at +18:00. You are asked to implement the Sulzmann \& Lu lexer for the +WHILE language. You can do the implementation in any programming +language you like, but you need to submit the source code with which +you answered the questions, otherwise a mark of 0\% will be +awarded. You can submit your answers in a txt-file or as pdf. Code +submit as code. Please package everything in a zip-file that creates a +directory with the name \texttt{YournameYourfamilyname} on my end. Thanks! + +\subsection*{Disclaimer\alert} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code from KEATS and the code I showed +during the lectures, which you can both freely use. You can +also use your own code from the CW~1. + +\subsection*{Question 1} + +To implement a lexer for the WHILE language, you first +need to design the appropriate regular expressions for the +following eleven syntactic entities: + +\begin{enumerate} +\item keywords are + +\begin{center} +\texttt{while}, +\texttt{if}, +\texttt{then}, +\texttt{else}, +\texttt{do}, +\texttt{for}, +\texttt{to}, +\texttt{true}, +\texttt{false}, +\texttt{read}, +\texttt{write}, +\texttt{skip} +\end{center} + +\item operators are: +\texttt{+}, +\texttt{-}, +\texttt{*}, +\texttt{\%}, +\texttt{/}, +\texttt{==}, +\texttt{!=}, +\texttt{>}, +\texttt{<}, +\texttt{<=}, +\texttt{>=}, +\texttt{:=}, +\texttt{\&\&}, +\texttt{||} + +\item letters are uppercase and lowercase + +\item symbols are letters plus the characters + \texttt{.}, + \texttt{\_}, + \texttt{>}, + \texttt{<}, + \texttt{=}, + \texttt{;}, + \texttt{,} and + \texttt{:} + +\item strings are enclosed by \texttt{"\ldots"} and consisting of + symbols, whitespaces and digits +\item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}} +\item there are semicolons \texttt{;} +\item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or + \texttt{$\backslash$t} +\item identifiers are letters followed by underscores \texttt{\_\!\_}, letters +or digits +\item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give +a regular expression that can recognise \pcode{0}, but not numbers +with leading zeroes, such as \pcode{001} +\item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line +\end{enumerate} + +\noindent +You can use the basic regular expressions + +\[ +\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* +\] + +\noindent +but also the following extended regular expressions + +\begin{center} +\begin{tabular}{ll} +$[c_1,c_2,\ldots,c_n]$ & a set of characters\\ +$r^+$ & one or more times $r$\\ +$r^?$ & optional $r$\\ +$r^{\{n\}}$ & n-times $r$\\ +\end{tabular} +\end{center} + +\noindent +Later on you will also need the record regular expression: + +\begin{center} +\begin{tabular}{ll} +$REC(x:r)$ & record regular expression\\ +\end{tabular} +\end{center} + +\noindent Try to design your regular expressions to be as +small as possible. For example you should use character sets +for identifiers and numbers. Feel free to use the general +character constructor \textit{CFUN} introduced in CW 1. + +\subsection*{Question 2} + +Implement the Sulzmann \& Lu lexer from the lectures. For +this you need to implement the functions $nullable$ and $der$ +(you can use your code from CW~1), as well as $mkeps$ and +$inj$. These functions need to be appropriately extended for +the extended regular expressions from Q1. Write down the +clauses for + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} +$mkeps([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ +$mkeps(r^+)$ & $\dn$ & $?$\\ +$mkeps(r^?)$ & $\dn$ & $?$\\ +$mkeps(r^{\{n\}})$ & $\dn$ & $?$\medskip\\ +$inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^+)\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^?)\,c\,\ldots$ & $\dn$ & $?$\\ +$inj\, (r^{\{n\}})\,c\,\ldots$ & $\dn$ & $?$\\ +\end{tabular} +\end{center} + +\noindent where $inj$ takes three arguments: a regular +expression, a character and a value. Test your lexer code +with at least the two small examples below: + +\begin{center} +\begin{tabular}{ll} +regex: & string:\smallskip\\ +$a^{\{3\}}$ & $aaa$\\ +$(a + \ONE)^{\{3\}}$ & $aa$ +\end{tabular} +\end{center} + + +\noindent Both strings should be successfully lexed by the +respective regular expression, that means the lexer returns +in both examples a value. + + +Also add the record regular expression from the +lectures to your lexer and implement a function, say +\pcode{env}, that returns all assignments from a value (such +that you can extract easily the tokens from a value).\medskip + +\noindent +Finally give the tokens for your regular expressions from Q1 and the +string + +\begin{center} +\code{"read n;"} +\end{center} + +\noindent +and use your \pcode{env} function to give the token sequence. + + +\subsection*{Question 3} + +Extend your lexer from Q2 to also simplify regular expressions after +each derivation step and rectify the computed values after each +injection. Use this lexer to tokenize the programs in +Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on +KEATS. Give the tokens of these programs where whitespaces are +filtered out. Make sure you can tokenise \textbf{exactly} these +programs.\bigskip + + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}} +\caption{Fibonacci program in the WHILE language.\label{fib}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}} +\caption{The three-nested-loops program in the WHILE language. +(Usually used for timing measurements.)\label{loop}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}} +\caption{A program that calculates factors for numbers in the WHILE + language.\label{factors}} +\end{figure} + +\begin{figure}[h] +\mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}} +\caption{A program that calculates the Collatz series for numbers + between 1 and 100.\label{collatz}} +\end{figure} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw03.pdf Binary file cws/cw03.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw03.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,167 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +\section*{Coursework 3} + + + +\noindent This coursework is worth 10\% and is due on \cwTHREE{} at +18:00. You are asked to implement a parser for the WHILE language and +also an interpreter. You can do the implementation in any programming +language you like, but you need to submit the source code with which +you answered the questions, otherwise a mark of 0\% will be +awarded. You should use the lexer from the previous coursework for the +parser. Please package everything(!) in a zip-file that creates a +directory with the name \texttt{YournameYourFamilyname} on my end. + +\subsection*{Disclaimer\alert} + +It should be understood that the work you submit represents your own +effort. You have not copied from anyone else. An exception is the +Scala code I showed during the lectures or uploaded to KEATS, which +you can both use. You can also use your own code from the CW~1 and +CW~2. + + +\subsection*{Question 1} + +Design a grammar for the WHILE language and give the grammar +rules. The main categories of non-terminals should be: + +\begin{itemize} +\item arithmetic expressions (with the operations from the + previous coursework, that is \pcode{+}, \pcode{-}, \pcode{*}, + \pcode{/} and \pcode{\%}) +\item boolean expressions (with the operations \pcode{==}, \pcode{<}, \pcode{>}, + \code{>=}, \code{<=}, + \code{!=}, \pcode{&&}, \pcode{||}, \pcode{true} and \pcode{false}) +\item single statements (that is \pcode{skip}, assignments, \pcode{if}s, + \pcode{while}-loops, \pcode{read} and \pcode{write}) +\item compound statements separated by semicolons +\item blocks which are enclosed in curly parentheses +\end{itemize} + +\noindent +Make sure the grammar is not left-recursive. + +\subsection*{Question 2} + +You should implement a parser for the WHILE language using parser +combinators. Be careful that the parser takes as input a stream, or +list, of \emph{tokens} generated by the tokenizer from the previous +coursework. For this you might want to filter out whitespaces and +comments. Your parser should be able to handle the WHILE programs in +Figures~\ref{fib}, \ref{loop} and \ref{primes}. In addition give the +parse tree for the statement: + +\begin{lstlisting}[language=While,numbers=none] +if (a < b) then skip else a := a * b + 1 +\end{lstlisting} + +\noindent +A (possibly incomplete) datatype for parse trees in Scala is shown +in Figure~\ref{trees}. + +\begin{figure}[p] +\begin{lstlisting}[language=Scala] +abstract class Stmt +abstract class AExp +abstract class BExp + +type Block = List[Stmt] + +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Read(s: String) extends Stmt +case class WriteVar(s: String) extends Stmt +case class WriteStr(s: String) extends Stmt + // for printing variables and strings + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp +case class Lop(o: String, b1: BExp, b2: BExp) extends BExp + // logical operations: and, or +\end{lstlisting} +\caption{The datatype for parse trees in Scala.\label{trees}} +\end{figure} + +\subsection*{Question 3} + +Implement an interpreter for the WHILE language you designed +and parsed in Question 1 and 2. This interpreter should take +as input a parse tree. However be careful because, programs +contain variables and variable assignments. This means +you need to maintain a kind of memory, or environment, +where you can look up a value of a variable and also +store a new value if it is assigned. Therefore an +evaluation function (interpreter) needs to look roughly as +follows + +\begin{lstlisting}[numbers=none] +eval_stmt(stmt, env) +\end{lstlisting} + +\noindent +where \pcode{stmt} corresponds to the parse tree +of the program and \pcode{env} is an environment +acting as a store for variable values. +Consider the Fibonacci program in Figure~\ref{fib}. +At the beginning of the program this store will be +empty, but needs to be extended in line 3 and 4 where +the variables \pcode{minus1} and \pcode{minus2} +are assigned values. These values need to be reassigned in +lines 7 and 8. The program should be interpreted +according to straightforward rules: for example an +if-statement will ``run'' the if-branch if the boolean +evaluates to \pcode{true}, otherwise the else-branch. +Loops should be run as long as the boolean is \pcode{true}. +Programs you should be able to run are shown in +Figures \ref{fib} -- \ref{collatz}. + + +Give some time measurements for your interpreter +and the loop program in Figure~\ref{loop}. For example +how long does your interpreter take when \pcode{start} +is initialised with 100, 500 and so on. How far can +you scale this value if you are willing to wait, say +1 Minute? + +\begin{figure}[h] +\lstinputlisting[language=while,xleftmargin=20mm]{../progs/while-tests/fib.while} +\caption{Fibonacci program in the WHILE language.\label{fib}} +\end{figure} + +\begin{figure}[h] +\lstinputlisting[language=while,xleftmargin=20mm]{../progs/while-tests/loops.while} +\caption{The three-nested-loops program in the WHILE language. +Usually used for timing measurements.\label{loop}} +\end{figure} + +\begin{figure}[h] +\lstinputlisting[language=while,xleftmargin=0mm]{../progs/while-tests/primes.while} +\caption{Prime number program.\label{primes}} +\end{figure} + + +\begin{figure}[p] +\lstinputlisting[language=while,xleftmargin=0mm]{../progs/while-tests/collatz2.while} +\caption{Collatz series program.\label{collatz}} +\end{figure} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw04.pdf Binary file cws/cw04.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw04.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw04.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,421 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +%https://github.com/Storyyeller/Krakatau +%https://docs.oracle.com/javase/specs/jvms/se7/html/ + +% Jasmin Tutorial +%http://saksagan.ceng.metu.edu.tr/courses/ceng444/link/jvm-cpm.html + +\section*{Coursework 4} + +\noindent This coursework is worth 10\% and is due on \cwFOUR{} +at 18:00. You are asked to implement a compiler for +the WHILE language that targets the assembler language +provided by Jasmin or Krakatau (both have very similar +syntax). You can do the implementation in any programming +language you like, but you need to submit the source code with +which you answered the questions, otherwise a mark of 0\% will +be awarded. You should use the lexer and parser from the +previous courseworks. Please package \emph{everything}(!) in +a zip-file that creates a directory with the name +\texttt{YournameYourFamilyname} on my end. + +\subsection*{Disclaimer\alert} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures, +which you can use. You can also use your own code from the +CW~1, CW~2 and CW~3. + + +\subsection*{Jasmin Assembler} + +The Jasmin assembler is available from + +\begin{center} +\url{http://jasmin.sourceforge.net} +\end{center} + +\noindent +There is a user guide for Jasmin + +\begin{center} +\url{http://jasmin.sourceforge.net/guide.html} +\end{center} + +\noindent and also a description of some of the instructions +that the JVM understands + +\begin{center} +\url{http://jasmin.sourceforge.net/instructions.html} +\end{center} + +\noindent If you generated a correct assembler file for +Jasmin, for example \texttt{loops.j}, you can use + +\begin{center} +\texttt{java -jar jasmin-2.4/jasmin.jar loops.j} +\end{center} + +\noindent in order to translate it into Java Byte Code. The +resulting class file can be run with + +\begin{center} +\texttt{java loops} +\end{center} + +\noindent where you might need to give the correct path to the +class file. For example: + +\begin{center} +\texttt{java -cp . loops/loops} +\end{center} + +\noindent There are also other resources about Jasmin on the +Internet, for example + +\begin{center} +\small\url{http://www.ceng.metu.edu.tr/courses/ceng444/link/f3jasmintutorial.html} +\end{center} + +\noindent and + +\begin{center} + \small\url{http://www.csc.villanova.edu/~tway/courses/csc4181/s2018/labs/lab4/JVM.pdf} +\end{center} + +\subsection*{Krakatau Assembler} + +The Krakatau assembler is available from + +\begin{center} +\url{https://github.com/Storyyeller/Krakatau} +\end{center} + +\noindent This assembler requires Python and a package called +\pcode{ply} available from + +\begin{center} +\url{https://pypi.python.org/pypi/ply} +\end{center} + +\noindent This assembler is largely compatible with the Jasmin +syntax---that means for the files we are concerned with here, +it understands the same input syntax (no changes to your +compiler need to be made; ok maybe some small syntactic +adjustments are needed). You can generate Java Byte Code by +using + +\begin{center} +\texttt{python Krakatau-master/assemble.py loops.j} +\end{center} + +\noindent where you may have to adapt the directory where +Krakatau is installed (I just downloaded the zip file from +Github and \pcode{Krakatau-master} was the directory where it +was installed). Again the resulting class-file you can run with +\texttt{java}. + + +%\noindent You need to submit a document containing the answers +%for the two questions below. You can do the implementation in +%any programming language you like, but you need to submit the +%source code with which you answered the questions. Otherwise +%the submission will not be counted. However, the coursework +%will \emph{only} be judged according to the answers. You can +%submit your answers in a txt-file or as pdf.\bigskip + + +\subsection*{Question 1} + +You need to lex and parse WHILE programs, and then generate +Java Byte Code instructions for the Jasmin assembler (or +Krakatau assembler). As solution you need to submit the +assembler instructions for the Fibonacci and Factorial +programs. Both should be so modified that a user can input on +the console which Fibonacci number and which Factorial should +be calculated. The Fibonacci program is given in +Figure~\ref{fibs}. You can write your own program for +calculating factorials. Submit your assembler code as +a file that can be run, not as PDF-text. + +\begin{figure}[t] +\lstinputlisting[language=while]{../progs/while-tests/fib.while} +\caption{The Fibonacci program in the WHILE language.\label{fibs}} +\end{figure} + +\subsection*{Question 2} + +Extend the syntax of your language so that it contains also +\texttt{for}-loops, like + +\begin{center} +\lstset{language=While} +\code{for} \;\textit{Id} \texttt{:=} \textit{AExp}\; \code{upto} +\;\textit{AExp}\; \code{do} \textit{Block} +\end{center} + +\noindent The intended meaning is to first assign the variable +\textit{Id} the value of the first arithmetic expression, test +whether this value is less or equal than the value of the +second arithmetic expression. If yes, go through the loop, and +at the end increase the value of the loop variable by 1 and +start again with the test. If no, leave the loop. For example +the following instance of a \code{for}-loop is supposed to +print out the numbers \pcode{2}, \pcode{3}, \pcode{4}. + + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=While, numbers=none] +for i := 2 upto 4 do { + write i +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent There are two ways how this can be implemented: one +is to adapt the code generation part of the compiler and +generate specific code for \code{for}-loops; the other is to +translate the abstract syntax tree of \code{for}-loops into +an abstract syntax tree using existing language constructs. +For example the loop above could be translated to the +following \code{while}-loop: + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=While, numbers=none] +i := 2; +while (i <= 4) do { + write i; + i := i + 1; +} +\end{lstlisting} +\end{minipage} +\end{center} + +\subsection*{Question 3} + +\noindent In this question you are supposed to give the +assembler instructions for the program + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=While, numbers=none] +for i := 1 upto 10 do { + for i := 1 upto 10 do { + write i + } +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +Note that in this program the variable \pcode{i} is used +twice. You need to make a decision how it should be compiled? +Explain your decision and indicate what this program would +print out. + +\subsection*{Further Information} + +The Java infrastructure unfortunately does not contain an +assembler out-of-the-box (therefore you need to download the +additional package Jasmin or Krakatau---see above). But it +does contain a disassembler, called \texttt{javap}. A +dissembler does the ``opposite'' of an assembler: it generates +readable assembler code from Java Byte Code. Have a look at +the following example: Compile using the usual Java compiler +the simple Hello World program below: + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=Java,numbers=none] +class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello World!"); + } +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +You can use the command + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language={},numbers=none] +javap -v HelloWorld +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent to see the assembler instructions of the Java Byte +Code that has been generated for this program. You can compare +this with the code generated for the Scala version of Hello +World. + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=Scala,numbers=none] +object HelloWorld { + def main(args: Array[String]) { + println("Hello World!") + } +} +\end{lstlisting} +\end{minipage} +\end{center} + + +\subsection*{Library Functions} + +You need to generate code for the commands \texttt{write} and +\texttt{read}. This will require the addition of some +``library'' functions to your generated code. The first +command even needs two versions, because you need to write out +an integer and string. The Java byte code will need two +separate functions for this. For writing out an integer, you +can use the assembler code + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=JVMIS, numbers=none] +.method public static write(I)V + .limit locals 1 + .limit stack 2 + getstatic java/lang/System/out Ljava/io/PrintStream; + iload 0 + invokevirtual java/io/PrintStream/println(I)V + return +.end method +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent This function will invoke Java's \texttt{println} +function for integers. Then if you need to generate code for +\texttt{write x} where \texttt{x} is an integer variable, you +can generate + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=JVMIS, numbers=none] +iload n +invokestatic XXX/XXX/write(I)V +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent where \texttt{n} is the index where the value of the +variable \texttt{x} is stored. The \texttt{XXX/XXX} needs to +be replaced with the class name which you use to generate the +code (for example \texttt{fib/fib} in case of the Fibonacci +numbers). + +Writing out a string is similar. The corresponding library +function uses strings instead of integers: + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=JVMIS, numbers=none] +.method public static writes(Ljava/lang/String;)V + .limit stack 2 + .limit locals 1 + getstatic java/lang/System/out Ljava/io/PrintStream; + aload 0 + invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V + return +.end method +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent The code that needs to be generated for \code{write +"some_string"} commands is + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=JVMIS,numbers=none] +ldc "some_string" +invokestatic XXX/XXX/writes(Ljava/lang/String;)V +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent Again you need to adjust the \texttt{XXX/XXX} part +in each call. + +The code for \texttt{read} is more complicated. The reason is +that inputting a string will need to be transformed into an +integer. The code in Figure~\ref{read} does this. It can be +called with + +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[language=JVMIS,numbers=none] +invokestatic XXX/XXX/read()I +istore n +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +where \texttt{n} is the index of the variable that requires an input. If you +use Windows you need to take into account that a ``return'' is not just a newline, +\code{'\\10'}, but \code{'\\13\\10'}. This means you need to change line~12 in +Figure~\ref{read} to \pcode{ldc 13}. + + +\begin{figure}[t]\small +\begin{lstlisting}[language=JVMIS,numbers=left] +.method public static read()I + .limit locals 10 + .limit stack 10 + + ldc 0 + istore 1 ; this will hold our final integer +Label1: + getstatic java/lang/System/in Ljava/io/InputStream; + invokevirtual java/io/InputStream/read()I + istore 2 + iload 2 + ldc 10 ; the newline delimiter for Unix (Windows 13) + isub + ifeq Label2 + iload 2 + ldc 32 ; the space delimiter + isub + ifeq Label2 + iload 2 + ldc 48 ; we have our digit in ASCII, have to subtract it from 48 + isub + ldc 10 + iload 1 + imul + iadd + istore 1 + goto Label1 +Label2: + ;when we come here we have our integer computed in Local Variable 1 + iload 1 + ireturn +.end method +\end{lstlisting}\normalsize +\caption{Assembler code for reading an integer from the console.\label{read}} +\end{figure} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw05.pdf Binary file cws/cw05.pdf has changed diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw05.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw05.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,41 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +\section*{Coursework 5} + + + +\noindent This coursework is worth 12\% and is due on \cwFIVE{} at +18:00. You are asked to implement a compiler targetting the LLVM-IR. +You can do the implementation in any programming +language you like, but you need to submit the source code with which +you answered the questions, otherwise a mark of 0\% will be +awarded. You should use the lexer from the previous coursework for the +parser. Please package everything(!) in a zip-file that creates a +directory with the name \texttt{YournameYourFamilyname} on my end. + +\subsection*{Disclaimer\alert} + +It should be understood that the work you submit represents your own +effort. You have not copied from anyone else. An exception is the +Scala code I showed during the lectures or uploaded to KEATS, which +you can both use. You can also use your own code from the CW~1 -- +CW~4. + + +\subsection*{Question 1} + +\subsection*{Question 2} + +\subsection*{Question 3} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca cws/cw0A.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw0A.tex Tue Sep 01 16:00:37 2020 +0100 @@ -0,0 +1,245 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} + +\begin{document} + +\section*{Coursework (Strand 2)} + +\noindent This coursework is worth 20\% and is due on \cwISABELLE{} at +18:00. You are asked to prove the correctness of the regular expression +matcher from the lectures using the Isabelle theorem prover. You need to +submit a theory file containing this proof and also a document +describing your proof. The Isabelle theorem prover is available from + +\begin{center} +\url{http://isabelle.in.tum.de} +\end{center} + +\noindent This is an interactive theorem prover, meaning that +you can make definitions and state properties, and then help +the system with proving these properties. Sometimes the proofs +are also completely automatic. There is a shortish user guide for +Isabelle, called ``Programming and Proving in Isabelle/HOL'' +at + +\begin{center} +\url{http://isabelle.in.tum.de/documentation.html} +\end{center} + +\noindent +and also a longer (free) book at + +\begin{center} +\url{http://www.concrete-semantics.org} +\end{center} + +\noindent The Isabelle theorem prover is operated through the +jEdit IDE, which might not be an editor that is widely known. +JEdit is documented in + +\begin{center} +\url{http://isabelle.in.tum.de/dist/Isabelle2014/doc/jedit.pdf} +\end{center} + + +\noindent If you need more help or you are stuck somewhere, +please feel free to contact me (christian.urban at kcl.ac.uk). I +am one of the main developers of Isabelle and have used it for +approximately 16 years. One of the success stories of +Isabelle is the recent verification of a microkernel operating +system by an Australian group, see \url{http://sel4.systems}. +Their operating system is the only one that has been proved +correct according to its specification and is used for +application where high assurance, security and reliability is +needed (like in helicopters which fly over enemy territory). + + +\subsection*{The Task} + +In this coursework you are asked to prove the correctness of the +regular expression matcher from the lectures in Isabelle. The matcher +should be able to deal with the usual (basic) regular expressions + +\[ +\ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^* +\] + +\noindent +but also with the following extended regular expressions: + +\begin{center} +\begin{tabular}{ll} + $r^{\{n\}}$ & exactly $n$-times\\ + $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\ + $r^{\{n..\}}$ & at least $n$-times $r$\\ + $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ + $\sim{}r$ & not-regular-expression of $r$\\ +\end{tabular} +\end{center} + + +\noindent +You need to first specify what the matcher is +supposed to do and then to implement the algorithm. Finally you need +to prove that the algorithm meets the specification. The first two +parts are relatively easy, because the definitions in Isabelle will +look very similar to the mathematical definitions from the lectures or +the Scala code that is supplied at KEATS. For example very similar to +Scala, regular expressions are defined in Isabelle as an inductive +datatype: + +\begin{lstlisting}[language={},numbers=none] +datatype rexp = + ZERO +| ONE +| CHAR char +| SEQ rexp rexp +| ALT rexp rexp +| STAR rexp +\end{lstlisting} + +\noindent The meaning of regular expressions is given as +usual: + +\begin{center} +\begin{tabular}{rcl@{\hspace{10mm}}l} +$L(\ZERO)$ & $\dn$ & $\varnothing$ & \pcode{ZERO}\\ +$L(\ONE)$ & $\dn$ & $\{[]\}$ & \pcode{ONE}\\ +$L(c)$ & $\dn$ & $\{[c]\}$ & \pcode{CHAR}\\ +$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ & \pcode{ALT}\\ +$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ & \pcode{SEQ}\\ +$L(r^*)$ & $\dn$ & $(L(r))^*$ & \pcode{STAR}\\ +\end{tabular} +\end{center} + +\noindent You would need to implement this function in order +to state the theorem about the correctness of the algorithm. +The function $L$ should in Isabelle take a \pcode{rexp} as +input and return a set of strings. Its type is +therefore + +\begin{center} +\pcode{L} \pcode{::} \pcode{rexp} $\Rightarrow$ \pcode{string set} +\end{center} + +\noindent Isabelle treats strings as an abbreviation for lists +of characters. This means you can pattern-match strings like +lists. The union operation on sets (for the \pcode{ALT}-case) +is a standard definition in Isabelle, but not the +concatenation operation on sets and also not the +star-operation. You would have to supply these definitions. +The concatenation operation can be defined in terms of the +append function, written \code{_ @ _} in Isabelle, for lists. +The star-operation can be defined as a ``big-union'' of +powers, like in the lectures, or directly as an inductive set. + +The functions for the matcher are shown in +Figure~\ref{matcher}. The theorem that needs to be proved is + +\begin{lstlisting}[numbers=none,language={},keywordstyle=\color{black}\ttfamily,mathescape] +theorem + "matches r s $\longleftrightarrow$ s $\in$ L r" +\end{lstlisting} + +\noindent which states that the function \emph{matches} is +true if and only if the string is in the language of the +regular expression. A proof for this lemma will need +side-lemmas about \pcode{nullable} and \pcode{der}. An example +proof in Isabelle that will not be relevant for the theorem +above is given in Figure~\ref{proof}. + +\begin{figure}[p] +\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] +fun + nullable :: "rexp $\Rightarrow$ bool" +where + "nullable ZERO = False" +| "nullable ONE = True" +| "nullable (CHAR _) = False" +| "nullable (ALT r1 r2) = (nullable(r1) $\vee$ nullable(r2))" +| "nullable (SEQ r1 r2) = (nullable(r1) $\wedge$ nullable(r2))" +| "nullable (STAR _) = True" + +fun + der :: "char $\Rightarrow$ rexp $\Rightarrow$ rexp" +where + "der c ZERO = ZERO" +| "der c ONE = ZERO" +| "der c (CHAR d) = (if c = d then ONE else ZERO)" +| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" +| "der c (SEQ r1 r2) = + (if (nullable r1) then ALT (SEQ (der c r1) r2) (der c r2) + else SEQ (der c r1) r2)" +| "der c (STAR r) = SEQ (der c r) (STAR r)" + +fun + ders :: "rexp $\Rightarrow$ string $\Rightarrow$ rexp" +where + "ders r [] = r" +| "ders r (c # s) = ders (der c r) s" + +fun + matches :: "rexp $\Rightarrow$ string $\Rightarrow$ bool" +where + "matches r s = nullable (ders r s)" +\end{lstlisting} +\caption{The definition of the matcher algorithm in +Isabelle.\label{matcher}} +\end{figure} + +\begin{figure}[p] +\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] +fun + zeroable :: "rexp $\Rightarrow$ bool" +where + "zeroable ZERO = True" +| "zeroable ONE = False" +| "zeroable (CHAR _) = False" +| "zeroable (ALT r1 r2) = (zeroable(r1) $\wedge$ zeroable(r2))" +| "zeroable (SEQ r1 r2) = (zeroable(r1) $\vee$ zeroable(r2))" +| "zeroable (STAR _) = False" + +lemma + "zeroable r $\longleftrightarrow$ L r = {}" +proof (induct) + case (ZERO) + have "zeroable ZERO" "L ZERO = {}" by simp_all + then show "zeroable ZERO $\longleftrightarrow$ (L ZERO = {})" by simp +next + case (ONE) + have "$\neg$ zeroable ONE" "L ONE = {[]}" by simp_all + then show "zeroable ONE $\longleftrightarrow$ (L ONE = {})" by simp +next + case (CHAR c) + have "$\neg$ zeroable (CHAR c)" "L (CHAR c) = {[c]}" by simp_all + then show "zeroable (CHAR c) $\longleftrightarrow$ (L (CHAR c) = {})" by simp +next + case (ALT r1 r2) + have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact + have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact + show "zeroable (ALT r1 r2) $\longleftrightarrow$ (L (ALT r1 r2) = {})" + using ih1 ih2 by simp +next + case (SEQ r1 r2) + have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact + have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact + show "zeroable (SEQ r1 r2) $\longleftrightarrow$ (L (SEQ r1 r2) = {})" + using ih1 ih2 by (auto simp add: Conc_def) +next + case (STAR r) + have "$\neg$ zeroable (STAR r)" "[] $\in$ L (r) ^ 0" by simp_all + then show "zeroable (STAR r) $\longleftrightarrow$ (L (STAR r) = {})" + by (simp (no_asm) add: Star_def) blast +qed +\end{lstlisting} +\caption{An Isabelle proof about the function \texttt{zeroable}.\label{proof}} +\end{figure} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 4b208d81e002 -r c0bdd4ad69ca mk --- a/mk Tue Sep 01 15:57:55 2020 +0100 +++ b/mk Tue Sep 01 16:00:37 2020 +0100 @@ -1,7 +1,7 @@ #!/bin/sh set -e -subdirs=${1:-"slides handouts hws coursework"} +subdirs=${1:-"slides handouts hws cws"} for sd in $subdirs; do cd $sd