coursework/cw04.tex
changeset 309 640e4a05cd9b
parent 298 bdf84605b6cd
child 313 90ccc385c547
--- a/coursework/cw04.tex	Mon Nov 17 08:38:52 2014 +0000
+++ b/coursework/cw04.tex	Sun Nov 23 22:12:18 2014 +0000
@@ -4,217 +4,310 @@
 
 \begin{document}
 
-\section*{Coursework (Strand 2)}
+\section*{Coursework 4 (Strand 1)}
+
+\noindent This coursework is worth 10\% and is due on 12th
+December at 16:00. You are asked to implement a compiler for
+the WHILE language that targets the assembler language
+provided by Jasmin. This assembler is available from
 
-\noindent This coursework is worth 25\% and is due on 12
-December at 16:00. You are asked to prove the correctness of a
-regular expression matcher from the lectures using the
-Isabelle theorem prover. You need to submit a theory file
-containing this proof. The Isabelle theorem prover is
-available from 
+\begin{center}
+\url{http://jasmin.sourceforge.net}
+\end{center}
+
+\noindent
+There is a user guide for Jasmin
 
 \begin{center}
-\url{http://isabelle.in.tum.de}
+\url{http://jasmin.sourceforge.net/guide.html}
+\end{center}
+
+\noindent
+and also a description of some of the instructions that the JVM understands
+
+\begin{center}
+\url{http://jasmin.sourceforge.net/instructions.html}
 \end{center}
 
-\noindent This is an interactive theorem prover, meaning that
-you can make definitions and state properties, and then help
-the system with proving these properties. Sometimes the proofs
-are also automatic. There is a shortish user guide for
-Isabelle, called ``Programming and Proving in Isabelle/HOL''
-at
+\noindent
+If you generated a correct assembler file for Jasmin, for example
+\texttt{loops.j}, you can use
 
 \begin{center}
-\url{http://isabelle.in.tum.de/documentation.html}
+\texttt{java -jar jasmin-2.4/jasmin.jar loops.j}
+\end{center}
+
+\noindent
+in order to translate it to Java byte code. The resulting class file can be
+run with
+
+\begin{center}
+\texttt{java loops}
 \end{center}
 
 \noindent
-and also a longer (free) book at
+where you might need to give the correct path to the class file. There
+are also other resources about Jasmin on the Internet, for example
+\mbox{\url{http://goo.gl/Qj8TeK}} and \mbox{\url{http://goo.gl/fpVNyT}}\;.\bigskip
+
+\noindent
+You need to submit a document containing the answers for the two questions 
+below. You can do the implementation in any programming language you like, but you need 
+to submit the source code with which you answered the questions. Otherwise
+the submission will not be counted.  However, the coursework 
+will \emph{only} be judged according to the answers. You can submit your answers
+in a txt-file or as pdf.\bigskip
+
+
+\subsection*{Question 1 (marked with 2\%)}
+
+You need to lex and parse WHILE programs and submit the assembler 
+instructions for the Fibonacci program and for the program you submitted
+in Coursework 2 in Question 3. The latter should be so modified that 
+a user can input the upper bound on the console (in the original question
+it was fixed to 100).
+
+\subsection*{Question 2 (marked with 2\%)}
+
+Extend the syntax of you language so that it contains also \texttt{for}-loops, like
 
 \begin{center}
-\url{http://www.concrete-semantics.org}
+\texttt{for} \;\textit{Id} \texttt{:=} \textit{AExp}\; \texttt{upto} \;\textit{AExp}\; \texttt{do} \textit{Block} 
 \end{center}
 
-\noindent The Isabelle theorem prover is operated through the
-jEdit IDE, which might not be an editor that is widely known.
-JEdit is documented in
+\noindent
+The intended meaning is to first assign the variable \textit{Id} the value of the first arithmetic 
+expression, then go through the loop, at the end increase the value of the variable by 1, 
+and finally test wether the value is not less or equal to the value of the second
+arithmetic expression. For example the following instance of a \texttt{for}-loop 
+is supposed to print out the numbers \texttt{2}, \texttt{3}, \texttt{4}.
+
+
+\begin{center}
+\begin{minipage}{6cm}
+\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none]
+for i := 2 upto 4 do {
+    write i	
+}
+\end{lstlisting}
+\end{minipage}
+\end{center}
+
+\noindent
+There are two ways how this can be implemented: one is to adapt the code generation 
+part of the compiler and generate specific code for \texttt{for}-loops; the other is to
+translate the abstract syntax tree of \texttt{for}-loops into an abstract syntax tree using
+existing language constructs. For example the loop above could be translated
+to the following \texttt{while}-loop:
 
 \begin{center}
-\url{http://isabelle.in.tum.de/dist/Isabelle2014/doc/jedit.pdf}
+\begin{minipage}{6cm}
+\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none]
+i := 2;
+while (i <= 4) do {
+    write i;
+    i := i + 1;
+}
+\end{lstlisting}
+\end{minipage}
+\end{center}
+
+\noindent
+In this question you are supposed to give the assembler instructions for the
+program
+
+\begin{center}
+\begin{minipage}{6cm}
+\begin{lstlisting}[language=While,basicstyle=\ttfamily, numbers=none]
+for i := 1 upto 10000 do {
+  for i := 1 upto 10000 do {
+  skip
+  }
+} 
+\end{lstlisting}
+\end{minipage}
 \end{center}
 
 
-\noindent If you need more help or you are stuck somewhere,
-please feel free to contact me (christian.urban@kcl.ac.uk). I
-am a main developer of Isabelle and have used it for
-approximately the 14 years. One of the success stories of
-Isabelle is the recent verification of a microkernel operating
-system by an Australian group, see \url{http://sel4.systems}.
-Their operating system is the only one that has been proved
-correct according to its specification and is used for
-application where high assurance, security and reliability is
-needed. 
+
+\subsection*{Further Information}
+
+The Java infrastructure unfortunately does not contain an assembler out-of-the-box
+(therefore
+you need to download the additional package Jasmin---see above). But it does contain a 
+disassembler, called \texttt{javap}. A dissembler does the ``opposite'' of an assembler: it
+generates readable assembler code from Java byte code. Have a look at the
+following example: Compile using the usual Java compiler the simple Hello World 
+program below:
+
+\begin{center}
+\begin{minipage}{10cm}
+\begin{lstlisting}[language=Java,basicstyle=\ttfamily]
+class HelloWorld {
+    public static void main(String[] args) {
+        System.out.println("Hello World!");
+    }
+}
+\end{lstlisting}
+\end{minipage}
+\end{center}
+
+\noindent
+You can use the command
+
+\begin{center}
+\texttt{javap -v HelloWorld}
+\end{center}
+
+\noindent
+to see the assembler instructions of the Java byte code that has been generated for this
+program. You can compare this with the code generated for the Scala
+version of Hello World.
+
+\begin{center}
+\begin{minipage}{10cm}
+\begin{lstlisting}[language=Scala,basicstyle=\ttfamily]
+object HelloWorld {
+   def main(args: Array[String]) {
+      println("Hello World!")
+  }
+}
+\end{lstlisting}
+\end{minipage}
+\end{center}
 
 
-\subsection*{The Task}
+\subsection*{Library Functions}
 
-In this coursework you are asked to prove the correctness of
-the regular expression matcher from the lectures in Isabelle.
-For this you need to first specify what the matcher is
-supposed to do and then to implement the algorithm. Finally
-you need to prove that the algorithm meets the specification.
-The first two parts are relatively easy, because the
-definitions in Isabelle will look very similar to the
-mathematical definitions from the lectures or the Scala code
-that is supplied at KEATS. For example very similar to Scala,
-regular expressions are defined in Isabelle as an inductive
-datatype:
-
-\begin{lstlisting}[language={},numbers=none]
-datatype rexp =
-  NULL
-| EMPTY
-| CHAR char
-| SEQ rexp rexp
-| ALT rexp rexp
-| STAR rexp
-\end{lstlisting}
-
-\noindent The meaning of regular expressions is given as 
-usual:
+You need to generate code for the commands \texttt{write} and \texttt{read}. This
+will require the addition of some ``library'' functions to your generated code. The first
+command even needs two versions, because you might want to write out an
+integer or a string. The Java byte code will need two separate functions for this.
+For writing out an integer, you can use the assembler code
 
 \begin{center}
-\begin{tabular}{rcl@{\hspace{10mm}}l}
-$L(\varnothing)$  & $\dn$ & $\varnothing$   & \pcode{NULL}\\
-$L(\epsilon)$     & $\dn$ & $\{[]\}$        & \pcode{EMPTY}\\ 
-$L(c)$            & $\dn$ & $\{[c]\}$       & \pcode{CHAR}\\
-$L(r_1 + r_2)$     & $\dn$ & $L(r_1) \cup L(r_2)$ & \pcode{ALT}\\
-$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ & \pcode{SEQ}\\
-$L(r^*)$           & $\dn$ & $(L(r))^*$ & \pcode{STAR}\\
-\end{tabular}
+\begin{minipage}{12cm}
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+.method public static write(I)V 
+    .limit locals 5 
+    .limit stack 5 
+    iload 0 
+    getstatic java/lang/System/out Ljava/io/PrintStream; 
+    swap 
+    invokevirtual java/io/PrintStream/println(I)V 
+    return 
+.end method
+\end{lstlisting}
+\end{minipage}
 \end{center}
 
-\noindent You would need to implement this function in order
-to state the theorem about the correctness of the algorithm.
-The function $L$ should in Isabelle take a \pcode{rexp} as
-input and return a set of strings. Its type is
-therefore 
+\noindent 
+This function will invoke Java's \texttt{println} function for integers. Then if you need
+to generate code for \texttt{write x} where \texttt{x} is an integer variable, you can generate
 
 \begin{center}
-\pcode{L} \pcode{::} \pcode{rexp} $\Rightarrow$ \pcode{string set}
+\begin{minipage}{8cm}
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+iload n 
+invokestatic XXX/XXX/write(I)V
+\end{lstlisting}
+\end{minipage}
 \end{center}
 
-\noindent Isabelle treats strings as an abbreviation for lists
-of characters. This means you can pattern-match strings like
-lists. The union operation on sets (for the \pcode{ALT}-case)
-is a standard definition in Isabelle, but not the
-concatenation operation on sets and also not the
-star-operation. You would have to supply these definitions.
-The concatenation operation can be defined in terms of the
-append function, written \code{_ @ _} in Isabelle, for lists.
-The star-operation can be defined as a ``big-union'' of 
-powers, like in the lectures, or directly as an inductive set.
+\noindent
+where \texttt{n} is the index where the value of the variable \texttt{x} is
+stored. The \texttt{XXX/XXX} needs to be replaced with the class name 
+which you use to generate the code (for example \texttt{fib/fib} in case
+of the Fibonacci numbers).
 
-The functions for the matcher are shown in
-Figure~\ref{matcher}. The theorem that needs to be proved is
-
-\begin{lstlisting}[numbers=none,language={},keywordstyle=\color{black}\ttfamily,mathescape]
-theorem 
-  "matches r s $\longleftrightarrow$ s $\in$ L r"
-\end{lstlisting}
-
-\noindent which states that the function \emph{matches} is
-true if and only if the string is in the language of the
-regular expression. A proof for this lemma will need
-side-lemmas about \pcode{nullable} and \pcode{der}. An example
-proof in Isabelle that will not be relevant for the theorem
-above is given in Figure~\ref{proof}.
+Writing out a string is similar. The corresponding library function uses strings 
+instead of integers:
 
-\begin{figure}[p]
-\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape]
-fun 
-  nullable :: "rexp $\Rightarrow$ bool"
-where
-  "nullable NULL = False"
-| "nullable EMPTY = True"
-| "nullable (CHAR _) = False"
-| "nullable (ALT r1 r2) = (nullable(r1) $\vee$ nullable(r2))"
-| "nullable (SEQ r1 r2) = (nullable(r1) $\wedge$ nullable(r2))"
-| "nullable (STAR _) = True"
+\begin{center}
+\begin{minipage}{12cm}
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+.method public static writes(Ljava/lang/String;)V
+   .limit stack 2
+   .limit locals 2
+   getstatic java/lang/System/out Ljava/io/PrintStream;
+   aload 0
+   invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V
+   return
+.end method
+\end{lstlisting}
+\end{minipage}
+\end{center}
 
-fun 
-  der :: "char $\Rightarrow$ rexp $\Rightarrow$ rexp"
-where
-  "der c NULL = NULL"
-| "der c EMPTY = NULL"
-| "der c (CHAR d) = (if c = d then EMPTY else NULL)"
-| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
-| "der c (SEQ r1 r2) = 
-     (if (nullable r1) then ALT (SEQ (der c r1) r2) (der c r2)
-                       else SEQ (der c r1) r2)"
-| "der c (STAR r) = SEQ (der c r) (STAR r)"
+\noindent
+The code that needs to be generated for \texttt{write "some\_string"} commands 
+is
 
-fun 
-  ders :: "rexp $\Rightarrow$ string $\Rightarrow$ rexp"
-where
-  "ders r [] = r"
-| "ders r (c # s) = ders (der c r) s"
+\begin{center}
+\begin{minipage}{8cm}
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+ldc "some_string"
+invokestatic XXX/XXX/writes(Ljava/lang/String;)V
+\end{lstlisting}
+\end{minipage}
+\end{center}
+
+\noindent
+Again you need to adjust the \texttt{XXX/XXX} part in each call.
+
+The code for \texttt{read} is more complicated. The reason is that inputting a string
+will need to be transformed into an integer. The code in Figure~\ref{read} does this.
+It can be called with
 
-fun 
-  matches :: "rexp $\Rightarrow$ string $\Rightarrow$ bool"
-where
-  "matches r s = nullable (ders r s)" 
+\begin{center}
+\begin{minipage}{8cm}
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+invokestatic XXX/XXX/read()I 
+istore n
 \end{lstlisting}
-\caption{The definition of the matcher algorithm in 
-Isabelle.\label{matcher}}
-\end{figure}
+\end{minipage}
+\end{center}
 
-\begin{figure}[p]
-\begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape]
-fun 
-  zeroable :: "rexp $\Rightarrow$ bool"
-where
-  "zeroable NULL = True"
-| "zeroable EMPTY = False"
-| "zeroable (CHAR _) = False"
-| "zeroable (ALT r1 r2) = (zeroable(r1) $\wedge$ zeroable(r2))"
-| "zeroable (SEQ r1 r2) = (zeroable(r1) $\vee$ zeroable(r2))"
-| "zeroable (STAR _) = False"
+\noindent 
+where \texttt{n} is the index of the variable that requires an input.
+
+
+\begin{figure}[p]\small
+\begin{lstlisting}[basicstyle=\ttfamily, numbers=none]
+.method public static read()I 
+      .limit locals 10 
+      .limit stack 10
 
-lemma
-  "zeroable r $\longleftrightarrow$ L r = {}"
-proof (induct)
-  case (NULL)
-  have "zeroable NULL" "L NULL = {}" by simp_all
-  then show "zeroable NULL $\longleftrightarrow$ (L NULL = {})" by simp
-next
-  case (EMPTY)
-  have "$\neg$ zeroable EMPTY" "L EMPTY = {[]}" by simp_all
-  then show "zeroable EMPTY $\longleftrightarrow$ (L EMPTY = {})" by simp
-next
-  case (CHAR c)
-  have "$\neg$ zeroable (CHAR c)" "L (CHAR c) = {[c]}" by simp_all
-  then show "zeroable (CHAR c) $\longleftrightarrow$ (L (CHAR c) = {})" by simp
-next 
-  case (ALT r1 r2)
-  have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact
-  have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact
-  show "zeroable (ALT r1 r2) $\longleftrightarrow$ (L (ALT r1 r2) = {})" 
-    using ih1 ih2 by simp
-next
-  case (SEQ r1 r2)
-  have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact
-  have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact
-  show "zeroable (SEQ r1 r2) $\longleftrightarrow$ (L (SEQ r1 r2) = {})" 
-    using ih1 ih2 by (auto simp add: Conc_def)
-next
-  case (STAR r)
-  have "$\neg$ zeroable (STAR r)" "[] $\in$ L (r) ^ 0" by simp_all
-  then show "zeroable (STAR r) $\longleftrightarrow$ (L (STAR r) = {})" 
-    by (simp (no_asm) add: Star_def) blast
-qed
-\end{lstlisting}
-\caption{An Isabelle proof about the function \pcode{zeroable}.\label{proof}}
+      ldc 0 
+      istore 1  ; this will hold our final integer 
+Label1: 
+      getstatic java/lang/System/in Ljava/io/InputStream; 
+      invokevirtual java/io/InputStream/read()I 
+      istore 2 
+      iload 2 
+      ldc 10   ; the newline delimiter 
+      isub 
+      ifeq Label2 
+      iload 2 
+      ldc 32   ; the space delimiter 
+      isub 
+      ifeq Label2
+
+      iload 2 
+      ldc 48   ; we have our digit in ASCII, have to subtract it from 48 
+      isub 
+      ldc 10 
+      iload 1 
+      imul 
+      iadd 
+      istore 1 
+      goto Label1 
+Label2: 
+      ;when we come here we have our integer computed in Local Variable 1 
+      iload 1 
+      ireturn 
+.end method
+\end{lstlisting}\normalsize
+\caption{Assembler code for reading an integer from the console.\label{read}}
 \end{figure}
 
 \end{document}