thys/Paper/Paper.thy
changeset 180 42ffaca7c85e
parent 179 85766e408c66
child 181 162f112b814b
equal deleted inserted replaced
179:85766e408c66 180:42ffaca7c85e
    52   F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and
    52   F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and
    53   F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and
    53   F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and
    54   F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and
    54   F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and
    55   simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and
    55   simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and
    56   simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and
    56   simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and
    57   slexer ("lexer\<^sup>+ _ _" [78,78] 77) and
    57   slexer ("lexer\<^sup>+" 1000) and
    58 
    58 
    59   ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and
    59   ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and
    60   ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77)
    60   ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77)
    61 
    61 
    62 definition 
    62 definition 
   651 
   651 
   652    \noindent
   652    \noindent
   653    We can prove that given a string @{term s} and regular expression @{term
   653    We can prove that given a string @{term s} and regular expression @{term
   654    r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow> v"}.
   654    r}, the POSIX value @{term v} is uniquely determined by @{term "s \<in> r \<rightarrow> v"}.
   655 
   655 
   656   \begin{theorem}
   656   \begin{theorem}\label{posixdeterm}
   657   @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]}
   657   @{thm[mode=IfThen] Posix_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]}
   658   \end{theorem}
   658   \end{theorem}
   659 
   659 
   660   \begin{proof} By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and
   660   \begin{proof} By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and
   661   a case analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}. This proof requires the
   661   a case analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}. This proof requires the
   776   With Lem.~\ref{Posix2} in place, it is completely routine to establish
   776   With Lem.~\ref{Posix2} in place, it is completely routine to establish
   777   that the Sulzmann and Lu lexer satisfies our specification (returning
   777   that the Sulzmann and Lu lexer satisfies our specification (returning
   778   the null value @{term "None"} iff the string is not in the language of the regular expression,
   778   the null value @{term "None"} iff the string is not in the language of the regular expression,
   779   and returning a unique POSIX value iff the string \emph{is} in the language):
   779   and returning a unique POSIX value iff the string \emph{is} in the language):
   780 
   780 
   781   \begin{theorem}\mbox{}\smallskip\\
   781   \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect}
   782   \begin{tabular}{ll}
   782   \begin{tabular}{ll}
   783   (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\
   783   (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\
   784   (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\
   784   (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\
   785   \end{tabular}
   785   \end{tabular}
   786   \end{theorem}
   786   \end{theorem}
   926   and then simplify the result. This gives us a simplified derivative
   926   and then simplify the result. This gives us a simplified derivative
   927   @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The lexer
   927   @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The lexer
   928   is then recursively called with the simplified derivative, but before
   928   is then recursively called with the simplified derivative, but before
   929   we inject the character @{term c} into the value @{term v}, we need to rectify
   929   we inject the character @{term c} into the value @{term v}, we need to rectify
   930   @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness
   930   @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness
   931   of @{const "slexer"}, we need to show that simplification preserves the language
   931   of @{term "slexer"}, we need to show that simplification preserves the language
   932   and the relation between simplification and our posix definition:
   932   and simplification preserves our POSIX relation once the value is rectified
   933 
   933   (recall @{const "simp"} generates a regular expression, rectification function pair):
   934   \begin{lemma}\mbox{}\smallskip\\
   934 
       
   935   \begin{lemma}\mbox{}\smallskip\\\label{slexeraux}
   935   \begin{tabular}{ll}
   936   \begin{tabular}{ll}
   936   (1) & @{thm L_fst_simp[symmetric]}\\
   937   (1) & @{thm L_fst_simp[symmetric]}\\
   937   (2) & @{thm[mode=IfThen] Posix_simp}
   938   (2) & @{thm[mode=IfThen] Posix_simp}
   938   \end{tabular}
   939   \end{tabular}
   939   \end{lemma}
   940   \end{lemma}
   940 
   941 
   941   \noindent
   942   \begin{proof}
   942   We can now prove that
   943   Both are by induction on @{text r}. There is no interesting case for the
       
   944   first statement. For the second statement of interest are the @{term "r = SEQ r\<^sub>1 r\<^sub>2"}
       
   945   and @{term "r = ALT r\<^sub>1 r\<^sub>2"} cases.
       
   946   
       
   947   \end{proof}
       
   948 
       
   949   \noindent We can now prove relatively straightforwardly that the
       
   950   optimised lexer produce the expected result:
   943 
   951 
   944   \begin{theorem}
   952   \begin{theorem}
   945   @{thm slexer_correctness}
   953   @{thm slexer_correctness}
   946   \end{theorem}
   954   \end{theorem}
   947 
   955 
   948   \noindent
   956   \begin{proof} By induction on @{term s} generalising over @{term
   949   holds but for lack of space refer the reader to our mechanisation for details.
   957   r}. The case @{term "[]"} is trivial. For the cons-case suppose the
   950 
   958   string is of the form @{term "c # s"}. By induction hypothesis we
       
   959   know @{term "slexer r s = lexer r s"} holds for all @{term r} (in
       
   960   particular for @{term "r"} being the derivative @{term "der c
       
   961   r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, @{term
       
   962   "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification
       
   963   function, @{term "snd (simp (der c r))"}.  We distinguish the cases
       
   964   whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we
       
   965   have by Thm~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
       
   966   "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold.
       
   967   By Lem~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
       
   968   \<in> L r\<^sub>s"} holds.  Hence we know by Thm~\ref{lexercorrect}(2) that
       
   969   there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and
       
   970   @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by
       
   971   Lem~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
       
   972   By the uniqueness of the POSIX relation (Thm~\ref{posixdeterm}) we
       
   973   can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the 
       
   974   rectification function applied to @{term "v'"}
       
   975   produces the original @{term "v"}.  Now the case follows by the
       
   976   definitions of @{const lexer} and @{const slexer}.
       
   977 
       
   978   In the second case where @{term "s \<notin> L (der c r)"} we have that
       
   979   @{term "lexer (der c r) s = None"} by Thm~\ref{lexercorrect}(1).  We
       
   980   also know by Lem~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
       
   981   @{term "lexer r\<^sub>s s = None"} by Thm~\ref{lexercorrect}(1) and
       
   982   by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can
       
   983   conclude in this case too.\qed   
       
   984 
       
   985   \end{proof} 
   951 *}
   986 *}
   952 
   987 
   953 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
   988 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
   954 
   989 
   955 text {*
   990 text {*