diff -r 3703ade9b17c -r 640e4a05cd9b coursework/cw04.tex --- a/coursework/cw04.tex Mon Nov 17 08:38:52 2014 +0000 +++ b/coursework/cw04.tex Sun Nov 23 22:12:18 2014 +0000 @@ -4,217 +4,310 @@ \begin{document} -\section*{Coursework (Strand 2)} +\section*{Coursework 4 (Strand 1)} + +\noindent This coursework is worth 10\% and is due on 12th +December at 16:00. You are asked to implement a compiler for +the WHILE language that targets the assembler language +provided by Jasmin. This assembler is available from -\noindent This coursework is worth 25\% and is due on 12 -December at 16:00. You are asked to prove the correctness of a -regular expression matcher from the lectures using the -Isabelle theorem prover. You need to submit a theory file -containing this proof. The Isabelle theorem prover is -available from +\begin{center} +\url{http://jasmin.sourceforge.net} +\end{center} + +\noindent +There is a user guide for Jasmin \begin{center} -\url{http://isabelle.in.tum.de} +\url{http://jasmin.sourceforge.net/guide.html} +\end{center} + +\noindent +and also a description of some of the instructions that the JVM understands + +\begin{center} +\url{http://jasmin.sourceforge.net/instructions.html} \end{center} -\noindent This is an interactive theorem prover, meaning that -you can make definitions and state properties, and then help -the system with proving these properties. Sometimes the proofs -are also automatic. There is a shortish user guide for -Isabelle, called ``Programming and Proving in Isabelle/HOL'' -at +\noindent +If you generated a correct assembler file for Jasmin, for example +\texttt{loops.j}, you can use \begin{center} -\url{http://isabelle.in.tum.de/documentation.html} +\texttt{java -jar jasmin-2.4/jasmin.jar loops.j} +\end{center} + +\noindent +in order to translate it to Java byte code. The resulting class file can be +run with + +\begin{center} +\texttt{java loops} \end{center} \noindent -and also a longer (free) book at +where you might need to give the correct path to the class file. There +are also other resources about Jasmin on the Internet, for example +\mbox{\url{http://goo.gl/Qj8TeK}} and \mbox{\url{http://goo.gl/fpVNyT}}\;.\bigskip + +\noindent +You need to submit a document containing the answers for the two questions +below. You can do the implementation in any programming language you like, but you need +to submit the source code with which you answered the questions. Otherwise +the submission will not be counted. However, the coursework +will \emph{only} be judged according to the answers. You can submit your answers +in a txt-file or as pdf.\bigskip + + +\subsection*{Question 1 (marked with 2\%)} + +You need to lex and parse WHILE programs and submit the assembler +instructions for the Fibonacci program and for the program you submitted +in Coursework 2 in Question 3. The latter should be so modified that +a user can input the upper bound on the console (in the original question +it was fixed to 100). + +\subsection*{Question 2 (marked with 2\%)} + +Extend the syntax of you language so that it contains also \texttt{for}-loops, like \begin{center} -\url{http://www.concrete-semantics.org} +\texttt{for} \;\textit{Id} \texttt{:=} \textit{AExp}\; \texttt{upto} \;\textit{AExp}\; \texttt{do} \textit{Block} \end{center} -\noindent The Isabelle theorem prover is operated through the -jEdit IDE, which might not be an editor that is widely known. -JEdit is documented in +\noindent +The intended meaning is to first assign the variable \textit{Id} the value of the first arithmetic +expression, then go through the loop, at the end increase the value of the variable by 1, +and finally test wether the value is not less or equal to the value of the second +arithmetic expression. For example the following instance of a \texttt{for}-loop +is supposed to print out the numbers \texttt{2}, \texttt{3}, \texttt{4}. + + +\begin{center} +\begin{minipage}{6cm} +\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none] +for i := 2 upto 4 do { + write i +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +There are two ways how this can be implemented: one is to adapt the code generation +part of the compiler and generate specific code for \texttt{for}-loops; the other is to +translate the abstract syntax tree of \texttt{for}-loops into an abstract syntax tree using +existing language constructs. For example the loop above could be translated +to the following \texttt{while}-loop: \begin{center} -\url{http://isabelle.in.tum.de/dist/Isabelle2014/doc/jedit.pdf} +\begin{minipage}{6cm} +\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none] +i := 2; +while (i <= 4) do { + write i; + i := i + 1; +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +In this question you are supposed to give the assembler instructions for the +program + +\begin{center} +\begin{minipage}{6cm} +\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none] +for i := 1 upto 10000 do { + for i := 1 upto 10000 do { + skip + } +} +\end{lstlisting} +\end{minipage} \end{center} -\noindent If you need more help or you are stuck somewhere, -please feel free to contact me (christian.urban@kcl.ac.uk). I -am a main developer of Isabelle and have used it for -approximately the 14 years. One of the success stories of -Isabelle is the recent verification of a microkernel operating -system by an Australian group, see \url{http://sel4.systems}. -Their operating system is the only one that has been proved -correct according to its specification and is used for -application where high assurance, security and reliability is -needed. + +\subsection*{Further Information} + +The Java infrastructure unfortunately does not contain an assembler out-of-the-box +(therefore +you need to download the additional package Jasmin---see above). But it does contain a +disassembler, called \texttt{javap}. A dissembler does the ``opposite'' of an assembler: it +generates readable assembler code from Java byte code. Have a look at the +following example: Compile using the usual Java compiler the simple Hello World +program below: + +\begin{center} +\begin{minipage}{10cm} +\begin{lstlisting}[language=Java,basicstyle=\ttfamily] +class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello World!"); + } +} +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +You can use the command + +\begin{center} +\texttt{javap -v HelloWorld} +\end{center} + +\noindent +to see the assembler instructions of the Java byte code that has been generated for this +program. You can compare this with the code generated for the Scala +version of Hello World. + +\begin{center} +\begin{minipage}{10cm} +\begin{lstlisting}[language=Scala,basicstyle=\ttfamily] +object HelloWorld { + def main(args: Array[String]) { + println("Hello World!") + } +} +\end{lstlisting} +\end{minipage} +\end{center} -\subsection*{The Task} +\subsection*{Library Functions} -In this coursework you are asked to prove the correctness of -the regular expression matcher from the lectures in Isabelle. -For this you need to first specify what the matcher is -supposed to do and then to implement the algorithm. Finally -you need to prove that the algorithm meets the specification. -The first two parts are relatively easy, because the -definitions in Isabelle will look very similar to the -mathematical definitions from the lectures or the Scala code -that is supplied at KEATS. For example very similar to Scala, -regular expressions are defined in Isabelle as an inductive -datatype: - -\begin{lstlisting}[language={},numbers=none] -datatype rexp = - NULL -| EMPTY -| CHAR char -| SEQ rexp rexp -| ALT rexp rexp -| STAR rexp -\end{lstlisting} - -\noindent The meaning of regular expressions is given as -usual: +You need to generate code for the commands \texttt{write} and \texttt{read}. This +will require the addition of some ``library'' functions to your generated code. The first +command even needs two versions, because you might want to write out an +integer or a string. The Java byte code will need two separate functions for this. +For writing out an integer, you can use the assembler code \begin{center} -\begin{tabular}{rcl@{\hspace{10mm}}l} -$L(\varnothing)$ & $\dn$ & $\varnothing$ & \pcode{NULL}\\ -$L(\epsilon)$ & $\dn$ & $\{[]\}$ & \pcode{EMPTY}\\ -$L(c)$ & $\dn$ & $\{[c]\}$ & \pcode{CHAR}\\ -$L(r_1 + r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$ & \pcode{ALT}\\ -$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ & \pcode{SEQ}\\ -$L(r^*)$ & $\dn$ & $(L(r))^*$ & \pcode{STAR}\\ -\end{tabular} +\begin{minipage}{12cm} +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method +\end{lstlisting} +\end{minipage} \end{center} -\noindent You would need to implement this function in order -to state the theorem about the correctness of the algorithm. -The function $L$ should in Isabelle take a \pcode{rexp} as -input and return a set of strings. Its type is -therefore +\noindent +This function will invoke Java's \texttt{println} function for integers. Then if you need +to generate code for \texttt{write x} where \texttt{x} is an integer variable, you can generate \begin{center} -\pcode{L} \pcode{::} \pcode{rexp} $\Rightarrow$ \pcode{string set} +\begin{minipage}{8cm} +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +iload n +invokestatic XXX/XXX/write(I)V +\end{lstlisting} +\end{minipage} \end{center} -\noindent Isabelle treats strings as an abbreviation for lists -of characters. This means you can pattern-match strings like -lists. The union operation on sets (for the \pcode{ALT}-case) -is a standard definition in Isabelle, but not the -concatenation operation on sets and also not the -star-operation. You would have to supply these definitions. -The concatenation operation can be defined in terms of the -append function, written \code{_ @ _} in Isabelle, for lists. -The star-operation can be defined as a ``big-union'' of -powers, like in the lectures, or directly as an inductive set. +\noindent +where \texttt{n} is the index where the value of the variable \texttt{x} is +stored. The \texttt{XXX/XXX} needs to be replaced with the class name +which you use to generate the code (for example \texttt{fib/fib} in case +of the Fibonacci numbers). -The functions for the matcher are shown in -Figure~\ref{matcher}. The theorem that needs to be proved is - -\begin{lstlisting}[numbers=none,language={},keywordstyle=\color{black}\ttfamily,mathescape] -theorem - "matches r s $\longleftrightarrow$ s $\in$ L r" -\end{lstlisting} - -\noindent which states that the function \emph{matches} is -true if and only if the string is in the language of the -regular expression. A proof for this lemma will need -side-lemmas about \pcode{nullable} and \pcode{der}. An example -proof in Isabelle that will not be relevant for the theorem -above is given in Figure~\ref{proof}. +Writing out a string is similar. The corresponding library function uses strings +instead of integers: -\begin{figure}[p] -\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] -fun - nullable :: "rexp $\Rightarrow$ bool" -where - "nullable NULL = False" -| "nullable EMPTY = True" -| "nullable (CHAR _) = False" -| "nullable (ALT r1 r2) = (nullable(r1) $\vee$ nullable(r2))" -| "nullable (SEQ r1 r2) = (nullable(r1) $\wedge$ nullable(r2))" -| "nullable (STAR _) = True" +\begin{center} +\begin{minipage}{12cm} +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +.method public static writes(Ljava/lang/String;)V + .limit stack 2 + .limit locals 2 + getstatic java/lang/System/out Ljava/io/PrintStream; + aload 0 + invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V + return +.end method +\end{lstlisting} +\end{minipage} +\end{center} -fun - der :: "char $\Rightarrow$ rexp $\Rightarrow$ rexp" -where - "der c NULL = NULL" -| "der c EMPTY = NULL" -| "der c (CHAR d) = (if c = d then EMPTY else NULL)" -| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" -| "der c (SEQ r1 r2) = - (if (nullable r1) then ALT (SEQ (der c r1) r2) (der c r2) - else SEQ (der c r1) r2)" -| "der c (STAR r) = SEQ (der c r) (STAR r)" +\noindent +The code that needs to be generated for \texttt{write "some\_string"} commands +is -fun - ders :: "rexp $\Rightarrow$ string $\Rightarrow$ rexp" -where - "ders r [] = r" -| "ders r (c # s) = ders (der c r) s" +\begin{center} +\begin{minipage}{8cm} +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +ldc "some_string" +invokestatic XXX/XXX/writes(Ljava/lang/String;)V +\end{lstlisting} +\end{minipage} +\end{center} + +\noindent +Again you need to adjust the \texttt{XXX/XXX} part in each call. + +The code for \texttt{read} is more complicated. The reason is that inputting a string +will need to be transformed into an integer. The code in Figure~\ref{read} does this. +It can be called with -fun - matches :: "rexp $\Rightarrow$ string $\Rightarrow$ bool" -where - "matches r s = nullable (ders r s)" +\begin{center} +\begin{minipage}{8cm} +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +invokestatic XXX/XXX/read()I +istore n \end{lstlisting} -\caption{The definition of the matcher algorithm in -Isabelle.\label{matcher}} -\end{figure} +\end{minipage} +\end{center} -\begin{figure}[p] -\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape] -fun - zeroable :: "rexp $\Rightarrow$ bool" -where - "zeroable NULL = True" -| "zeroable EMPTY = False" -| "zeroable (CHAR _) = False" -| "zeroable (ALT r1 r2) = (zeroable(r1) $\wedge$ zeroable(r2))" -| "zeroable (SEQ r1 r2) = (zeroable(r1) $\vee$ zeroable(r2))" -| "zeroable (STAR _) = False" +\noindent +where \texttt{n} is the index of the variable that requires an input. + + +\begin{figure}[p]\small +\begin{lstlisting}[basicstyle=\ttfamily, numbers=none] +.method public static read()I + .limit locals 10 + .limit stack 10 -lemma - "zeroable r $\longleftrightarrow$ L r = {}" -proof (induct) - case (NULL) - have "zeroable NULL" "L NULL = {}" by simp_all - then show "zeroable NULL $\longleftrightarrow$ (L NULL = {})" by simp -next - case (EMPTY) - have "$\neg$ zeroable EMPTY" "L EMPTY = {[]}" by simp_all - then show "zeroable EMPTY $\longleftrightarrow$ (L EMPTY = {})" by simp -next - case (CHAR c) - have "$\neg$ zeroable (CHAR c)" "L (CHAR c) = {[c]}" by simp_all - then show "zeroable (CHAR c) $\longleftrightarrow$ (L (CHAR c) = {})" by simp -next - case (ALT r1 r2) - have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact - have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact - show "zeroable (ALT r1 r2) $\longleftrightarrow$ (L (ALT r1 r2) = {})" - using ih1 ih2 by simp -next - case (SEQ r1 r2) - have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact - have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact - show "zeroable (SEQ r1 r2) $\longleftrightarrow$ (L (SEQ r1 r2) = {})" - using ih1 ih2 by (auto simp add: Conc_def) -next - case (STAR r) - have "$\neg$ zeroable (STAR r)" "[] $\in$ L (r) ^ 0" by simp_all - then show "zeroable (STAR r) $\longleftrightarrow$ (L (STAR r) = {})" - by (simp (no_asm) add: Star_def) blast -qed -\end{lstlisting} -\caption{An Isabelle proof about the function \pcode{zeroable}.\label{proof}} + ldc 0 + istore 1 ; this will hold our final integer +Label1: + getstatic java/lang/System/in Ljava/io/InputStream; + invokevirtual java/io/InputStream/read()I + istore 2 + iload 2 + ldc 10 ; the newline delimiter + isub + ifeq Label2 + iload 2 + ldc 32 ; the space delimiter + isub + ifeq Label2 + + iload 2 + ldc 48 ; we have our digit in ASCII, have to subtract it from 48 + isub + ldc 10 + iload 1 + imul + iadd + istore 1 + goto Label1 +Label2: + ;when we come here we have our integer computed in Local Variable 1 + iload 1 + ireturn +.end method +\end{lstlisting}\normalsize +\caption{Assembler code for reading an integer from the console.\label{read}} \end{figure} \end{document}