coursework/cw0A.tex
changeset 752 c0bdd4ad69ca
parent 751 4b208d81e002
child 753 d94fdbef1a4f
equal deleted inserted replaced
751:4b208d81e002 752:c0bdd4ad69ca
     1 % !TEX program = xelatex
       
     2 \documentclass{article}
       
     3 \usepackage{../style}
       
     4 \usepackage{../langs}
       
     5 
       
     6 \begin{document}
       
     7 
       
     8 \section*{Coursework (Strand 2)}
       
     9 
       
    10 \noindent This coursework is worth 20\% and is due on \cwISABELLE{} at
       
    11 18:00. You are asked to prove the correctness of the regular expression
       
    12 matcher from the lectures using the Isabelle theorem prover. You need to
       
    13 submit a theory file containing this proof and also a document
       
    14 describing your proof. The Isabelle theorem prover is available from
       
    15 
       
    16 \begin{center}
       
    17 \url{http://isabelle.in.tum.de}
       
    18 \end{center}
       
    19 
       
    20 \noindent This is an interactive theorem prover, meaning that
       
    21 you can make definitions and state properties, and then help
       
    22 the system with proving these properties. Sometimes the proofs
       
    23 are also completely automatic. There is a shortish user guide for
       
    24 Isabelle, called ``Programming and Proving in Isabelle/HOL''
       
    25 at
       
    26 
       
    27 \begin{center}
       
    28 \url{http://isabelle.in.tum.de/documentation.html}
       
    29 \end{center}
       
    30 
       
    31 \noindent
       
    32 and also a longer (free) book at
       
    33 
       
    34 \begin{center}
       
    35 \url{http://www.concrete-semantics.org}
       
    36 \end{center}
       
    37 
       
    38 \noindent The Isabelle theorem prover is operated through the
       
    39 jEdit IDE, which might not be an editor that is widely known.
       
    40 JEdit is documented in
       
    41 
       
    42 \begin{center}
       
    43 \url{http://isabelle.in.tum.de/dist/Isabelle2014/doc/jedit.pdf}
       
    44 \end{center}
       
    45 
       
    46 
       
    47 \noindent If you need more help or you are stuck somewhere,
       
    48 please feel free to contact me (christian.urban at kcl.ac.uk). I
       
    49 am one of the main developers of Isabelle and have used it for
       
    50 approximately 16 years. One of the success stories of
       
    51 Isabelle is the recent verification of a microkernel operating
       
    52 system by an Australian group, see \url{http://sel4.systems}.
       
    53 Their operating system is the only one that has been proved
       
    54 correct according to its specification and is used for
       
    55 application where high assurance, security and reliability is
       
    56 needed (like in helicopters which fly over enemy territory).
       
    57 
       
    58 
       
    59 \subsection*{The Task}
       
    60 
       
    61 In this coursework you are asked to prove the correctness of the
       
    62 regular expression matcher from the lectures in Isabelle.  The matcher
       
    63 should be able to deal with the usual (basic) regular expressions
       
    64 
       
    65 \[
       
    66 \ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^*
       
    67 \]
       
    68 
       
    69 \noindent
       
    70 but also with the following extended regular expressions:
       
    71 
       
    72 \begin{center}
       
    73 \begin{tabular}{ll}
       
    74   $r^{\{n\}}$ & exactly $n$-times\\
       
    75   $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\
       
    76   $r^{\{n..\}}$ & at least $n$-times $r$\\
       
    77   $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
       
    78   $\sim{}r$ & not-regular-expression of $r$\\
       
    79 \end{tabular}
       
    80 \end{center}
       
    81 
       
    82 
       
    83 \noindent
       
    84 You need to first specify what the matcher is
       
    85 supposed to do and then to implement the algorithm. Finally you need
       
    86 to prove that the algorithm meets the specification.  The first two
       
    87 parts are relatively easy, because the definitions in Isabelle will
       
    88 look very similar to the mathematical definitions from the lectures or
       
    89 the Scala code that is supplied at KEATS. For example very similar to
       
    90 Scala, regular expressions are defined in Isabelle as an inductive
       
    91 datatype:
       
    92 
       
    93 \begin{lstlisting}[language={},numbers=none]
       
    94 datatype rexp =
       
    95   ZERO
       
    96 | ONE
       
    97 | CHAR char
       
    98 | SEQ rexp rexp
       
    99 | ALT rexp rexp
       
   100 | STAR rexp
       
   101 \end{lstlisting}
       
   102 
       
   103 \noindent The meaning of regular expressions is given as 
       
   104 usual:
       
   105 
       
   106 \begin{center}
       
   107 \begin{tabular}{rcl@{\hspace{10mm}}l}
       
   108 $L(\ZERO)$  & $\dn$ & $\varnothing$   & \pcode{ZERO}\\
       
   109 $L(\ONE)$     & $\dn$ & $\{[]\}$        & \pcode{ONE}\\ 
       
   110 $L(c)$            & $\dn$ & $\{[c]\}$       & \pcode{CHAR}\\
       
   111 $L(r_1 + r_2)$     & $\dn$ & $L(r_1) \cup L(r_2)$ & \pcode{ALT}\\
       
   112 $L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$ & \pcode{SEQ}\\
       
   113 $L(r^*)$           & $\dn$ & $(L(r))^*$ & \pcode{STAR}\\
       
   114 \end{tabular}
       
   115 \end{center}
       
   116 
       
   117 \noindent You would need to implement this function in order
       
   118 to state the theorem about the correctness of the algorithm.
       
   119 The function $L$ should in Isabelle take a \pcode{rexp} as
       
   120 input and return a set of strings. Its type is
       
   121 therefore 
       
   122 
       
   123 \begin{center}
       
   124 \pcode{L} \pcode{::} \pcode{rexp} $\Rightarrow$ \pcode{string set}
       
   125 \end{center}
       
   126 
       
   127 \noindent Isabelle treats strings as an abbreviation for lists
       
   128 of characters. This means you can pattern-match strings like
       
   129 lists. The union operation on sets (for the \pcode{ALT}-case)
       
   130 is a standard definition in Isabelle, but not the
       
   131 concatenation operation on sets and also not the
       
   132 star-operation. You would have to supply these definitions.
       
   133 The concatenation operation can be defined in terms of the
       
   134 append function, written \code{_ @ _} in Isabelle, for lists.
       
   135 The star-operation can be defined as a ``big-union'' of 
       
   136 powers, like in the lectures, or directly as an inductive set.
       
   137 
       
   138 The functions for the matcher are shown in
       
   139 Figure~\ref{matcher}. The theorem that needs to be proved is
       
   140 
       
   141 \begin{lstlisting}[numbers=none,language={},keywordstyle=\color{black}\ttfamily,mathescape]
       
   142 theorem 
       
   143   "matches r s $\longleftrightarrow$ s $\in$ L r"
       
   144 \end{lstlisting}
       
   145 
       
   146 \noindent which states that the function \emph{matches} is
       
   147 true if and only if the string is in the language of the
       
   148 regular expression. A proof for this lemma will need
       
   149 side-lemmas about \pcode{nullable} and \pcode{der}. An example
       
   150 proof in Isabelle that will not be relevant for the theorem
       
   151 above is given in Figure~\ref{proof}.
       
   152 
       
   153 \begin{figure}[p]
       
   154 \begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape]
       
   155 fun 
       
   156   nullable :: "rexp $\Rightarrow$ bool"
       
   157 where
       
   158   "nullable ZERO = False"
       
   159 | "nullable ONE = True"
       
   160 | "nullable (CHAR _) = False"
       
   161 | "nullable (ALT r1 r2) = (nullable(r1) $\vee$ nullable(r2))"
       
   162 | "nullable (SEQ r1 r2) = (nullable(r1) $\wedge$ nullable(r2))"
       
   163 | "nullable (STAR _) = True"
       
   164 
       
   165 fun 
       
   166   der :: "char $\Rightarrow$ rexp $\Rightarrow$ rexp"
       
   167 where
       
   168   "der c ZERO = ZERO"
       
   169 | "der c ONE = ZERO"
       
   170 | "der c (CHAR d) = (if c = d then ONE else ZERO)"
       
   171 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
       
   172 | "der c (SEQ r1 r2) = 
       
   173      (if (nullable r1) then ALT (SEQ (der c r1) r2) (der c r2)
       
   174                        else SEQ (der c r1) r2)"
       
   175 | "der c (STAR r) = SEQ (der c r) (STAR r)"
       
   176 
       
   177 fun 
       
   178   ders :: "rexp $\Rightarrow$ string $\Rightarrow$ rexp"
       
   179 where
       
   180   "ders r [] = r"
       
   181 | "ders r (c # s) = ders (der c r) s"
       
   182 
       
   183 fun 
       
   184   matches :: "rexp $\Rightarrow$ string $\Rightarrow$ bool"
       
   185 where
       
   186   "matches r s = nullable (ders r s)" 
       
   187 \end{lstlisting}
       
   188 \caption{The definition of the matcher algorithm in 
       
   189 Isabelle.\label{matcher}}
       
   190 \end{figure}
       
   191 
       
   192 \begin{figure}[p]
       
   193 \begin{lstlisting}[language={},keywordstyle=\color{black}\ttfamily,mathescape]
       
   194 fun 
       
   195   zeroable :: "rexp $\Rightarrow$ bool"
       
   196 where
       
   197   "zeroable ZERO = True"
       
   198 | "zeroable ONE = False"
       
   199 | "zeroable (CHAR _) = False"
       
   200 | "zeroable (ALT r1 r2) = (zeroable(r1) $\wedge$ zeroable(r2))"
       
   201 | "zeroable (SEQ r1 r2) = (zeroable(r1) $\vee$ zeroable(r2))"
       
   202 | "zeroable (STAR _) = False"
       
   203 
       
   204 lemma
       
   205   "zeroable r $\longleftrightarrow$ L r = {}"
       
   206 proof (induct)
       
   207   case (ZERO)
       
   208   have "zeroable ZERO" "L ZERO = {}" by simp_all
       
   209   then show "zeroable ZERO $\longleftrightarrow$ (L ZERO = {})" by simp
       
   210 next
       
   211   case (ONE)
       
   212   have "$\neg$ zeroable ONE" "L ONE = {[]}" by simp_all
       
   213   then show "zeroable ONE $\longleftrightarrow$ (L ONE = {})" by simp
       
   214 next
       
   215   case (CHAR c)
       
   216   have "$\neg$ zeroable (CHAR c)" "L (CHAR c) = {[c]}" by simp_all
       
   217   then show "zeroable (CHAR c) $\longleftrightarrow$ (L (CHAR c) = {})" by simp
       
   218 next 
       
   219   case (ALT r1 r2)
       
   220   have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact
       
   221   have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact
       
   222   show "zeroable (ALT r1 r2) $\longleftrightarrow$ (L (ALT r1 r2) = {})" 
       
   223     using ih1 ih2 by simp
       
   224 next
       
   225   case (SEQ r1 r2)
       
   226   have ih1: "zeroable r1 $\longleftrightarrow$ L r1 = {}" by fact
       
   227   have ih2: "zeroable r2 $\longleftrightarrow$ L r2 = {}" by fact
       
   228   show "zeroable (SEQ r1 r2) $\longleftrightarrow$ (L (SEQ r1 r2) = {})" 
       
   229     using ih1 ih2 by (auto simp add: Conc_def)
       
   230 next
       
   231   case (STAR r)
       
   232   have "$\neg$ zeroable (STAR r)" "[] $\in$ L (r) ^ 0" by simp_all
       
   233   then show "zeroable (STAR r) $\longleftrightarrow$ (L (STAR r) = {})" 
       
   234     by (simp (no_asm) add: Star_def) blast
       
   235 qed
       
   236 \end{lstlisting}
       
   237 \caption{An Isabelle proof about the function \texttt{zeroable}.\label{proof}}
       
   238 \end{figure}
       
   239 
       
   240 \end{document}
       
   241 
       
   242 %%% Local Variables: 
       
   243 %%% mode: latex
       
   244 %%% TeX-master: t
       
   245 %%% End: