thys/Paper/Paper.thy
changeset 183 685bff2890d5
parent 182 2e70c1b06ac0
child 184 a42c773ec8ab
equal deleted inserted replaced
182:2e70c1b06ac0 183:685bff2890d5
   622   definition is inspired by the matching relation given by Vansummeren
   622   definition is inspired by the matching relation given by Vansummeren
   623   \cite{Vansummeren2006}. The relation we define is ternary and written as
   623   \cite{Vansummeren2006}. The relation we define is ternary and written as
   624   \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings, regular expressions and
   624   \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings, regular expressions and
   625   values.
   625   values.
   626   %
   626   %
   627   \begin{center}\small
   627   \begin{center}
   628   \begin{tabular}{c}
   628   \begin{tabular}{c}
   629   @{thm[mode=Axiom] Posix.intros(1)}@{text "P"}@{term "ONE"} \qquad
   629   @{thm[mode=Axiom] Posix.intros(1)}@{text "P"}@{term "ONE"} \qquad
   630   @{thm[mode=Axiom] Posix.intros(2)}@{text "P"}@{term "c"}\medskip\\
   630   @{thm[mode=Axiom] Posix.intros(2)}@{text "P"}@{term "c"}\medskip\\
   631   @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad
   631   @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad
   632   @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}@{text "P+R"}\medskip\\
   632   @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}@{text "P+R"}\medskip\\
   866   a \emph{rectification function} that ``repairs'' the incorrect value.
   866   a \emph{rectification function} that ``repairs'' the incorrect value.
   867   
   867   
   868   The rectification functions can be (slightly clumsily) implemented  in
   868   The rectification functions can be (slightly clumsily) implemented  in
   869   Isabelle/HOL as follows using some auxiliary functions:
   869   Isabelle/HOL as follows using some auxiliary functions:
   870 
   870 
   871   \begin{center}\small
   871   \begin{center}
   872   \begin{tabular}{cc}
       
   873   \begin{tabular}{lcl}
   872   \begin{tabular}{lcl}
   874   @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & @{text "Right (f v)"}\\
   873   @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & @{text "Right (f v)"}\\
   875   @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & @{text "Left (f v)"}\\
   874   @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & @{text "Left (f v)"}\\
   876   
   875   
   877   @{thm (lhs) F_ALT.simps(1)} & $\dn$ & @{text "Right (f\<^sub>2 v)"}\\
   876   @{thm (lhs) F_ALT.simps(1)} & $\dn$ & @{text "Right (f\<^sub>2 v)"}\\
   878   @{thm (lhs) F_ALT.simps(2)} & $\dn$ & @{text "Left (f\<^sub>1 v)"}\\
   877   @{thm (lhs) F_ALT.simps(2)} & $\dn$ & @{text "Left (f\<^sub>1 v)"}\\
   879   
   878   
   880   @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 ()) (f\<^sub>2 v)"}\\
   879   @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 ()) (f\<^sub>2 v)"}\\
   881   @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v) (f\<^sub>2 ())"}\\
   880   @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v) (f\<^sub>2 ())"}\\
   882   @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)"}%\bigskip\\
   881   @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)"}\medskip\\
   883   \end{tabular}
   882   \end{tabular}
   884   &
   883 
   885   \begin{tabular}{lcl}
   884   \begin{tabular}{lcl}
   886   @{term "simp_ALT (ZERO, DUMMY) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_RIGHT f\<^sub>2)"}\\
   885   @{term "simp_ALT (ZERO, DUMMY) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_RIGHT f\<^sub>2)"}\\
   887   @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, DUMMY)"} & $\dn$ & @{term "(r\<^sub>1, F_LEFT f\<^sub>1)"}\\
   886   @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (ZERO, DUMMY)"} & $\dn$ & @{term "(r\<^sub>1, F_LEFT f\<^sub>1)"}\\
   888   @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)"}\\
   887   @{term "simp_ALT (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(ALT r\<^sub>1 r\<^sub>2, F_ALT f\<^sub>1 f\<^sub>2)"}\\
   889   @{term "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)"}\\
   888   @{term "simp_SEQ (ONE, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>2, F_SEQ1 f\<^sub>1 f\<^sub>2)"}\\
   890   @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)"}\\
   889   @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (ONE, f\<^sub>2)"} & $\dn$ & @{term "(r\<^sub>1, F_SEQ2 f\<^sub>1 f\<^sub>2)"}\\
   891   @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)"}\\
   890   @{term "simp_SEQ (r\<^sub>1, f\<^sub>1) (r\<^sub>2, f\<^sub>2)"} & $\dn$ & @{term "(SEQ r\<^sub>1 r\<^sub>2, F_SEQ f\<^sub>1 f\<^sub>2)"}\\
   892   \end{tabular}
       
   893   \end{tabular}
   891   \end{tabular}
   894   \end{center}
   892   \end{center}
   895 
   893 
   896   \noindent
   894   \noindent
   897   The functions @{text "simp\<^bsub>Alt\<^esub>"} and @{text "simp\<^bsub>Seq\<^esub>"} encode the simplification rules
   895   The functions @{text "simp\<^bsub>Alt\<^esub>"} and @{text "simp\<^bsub>Seq\<^esub>"} encode the simplification rules
   958   gives @{term "s \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> snd (simp (ALT r\<^sub>1 r\<^sub>2)) v"} as we need to show.
   956   gives @{term "s \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> snd (simp (ALT r\<^sub>1 r\<^sub>2)) v"} as we need to show.
   959   The other cases are similar.\qed
   957   The other cases are similar.\qed
   960   \end{proof}
   958   \end{proof}
   961 
   959 
   962   \noindent We can now prove relatively straightforwardly that the
   960   \noindent We can now prove relatively straightforwardly that the
   963   optimised lexer produce the expected result:
   961   optimised lexer produces the expected result:
   964 
   962 
   965   \begin{theorem}
   963   \begin{theorem}
   966   @{thm slexer_correctness}
   964   @{thm slexer_correctness}
   967   \end{theorem}
   965   \end{theorem}
   968 
   966 
   969   \begin{proof} By induction on @{term s} generalising over @{term
   967   \begin{proof} By induction on @{term s} generalising over @{term
   970   r}. The case @{term "[]"} is trivial. For the cons-case suppose the
   968   r}. The case @{term "[]"} is trivial. For the cons-case suppose the
   971   string is of the form @{term "c # s"}. By induction hypothesis we
   969   string is of the form @{term "c # s"}. By induction hypothesis we
   972   know @{term "slexer r s = lexer r s"} holds for all @{term r} (in
   970   know @{term "slexer r s = lexer r s"} holds for all @{term r} (in
   973   particular for @{term "r"} being the derivative @{term "der c
   971   particular for @{term "r"} being the derivative @{term "der c
   974   r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, @{term
   972   r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, that is @{term
   975   "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification
   973   "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification
   976   function, @{term "snd (simp (der c r))"}.  We distinguish the cases
   974   function, that is @{term "snd (simp (der c r))"}.  We distinguish the cases
   977   whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we
   975   whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we
   978   have by Thm~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
   976   have by Thm.~\ref{lexercorrect}(2) a value @{term "v"} so that @{term
   979   "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold.
   977   "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold.
   980   By Lem~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
   978   By Lem.~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s
   981   \<in> L r\<^sub>s"} holds.  Hence we know by Thm~\ref{lexercorrect}(2) that
   979   \<in> L r\<^sub>s"} holds.  Hence we know by Thm.~\ref{lexercorrect}(2) that
   982   there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and
   980   there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and
   983   @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by
   981   @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by
   984   Lem~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
   982   Lem.~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds.
   985   By the uniqueness of the POSIX relation (Thm~\ref{posixdeterm}) we
   983   By the uniqueness of the POSIX relation (Thm.~\ref{posixdeterm}) we
   986   can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the 
   984   can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the 
   987   rectification function applied to @{term "v'"}
   985   rectification function applied to @{term "v'"}
   988   produces the original @{term "v"}.  Now the case follows by the
   986   produces the original @{term "v"}.  Now the case follows by the
   989   definitions of @{const lexer} and @{const slexer}.
   987   definitions of @{const lexer} and @{const slexer}.
   990 
   988 
   991   In the second case where @{term "s \<notin> L (der c r)"} we have that
   989   In the second case where @{term "s \<notin> L (der c r)"} we have that
   992   @{term "lexer (der c r) s = None"} by Thm~\ref{lexercorrect}(1).  We
   990   @{term "lexer (der c r) s = None"} by Thm.~\ref{lexercorrect}(1).  We
   993   also know by Lem~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
   991   also know by Lem.~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence
   994   @{term "lexer r\<^sub>s s = None"} by Thm~\ref{lexercorrect}(1) and
   992   @{term "lexer r\<^sub>s s = None"} by Thm.~\ref{lexercorrect}(1) and
   995   by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can
   993   by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can
   996   conclude in this case too.\qed   
   994   conclude in this case too.\qed   
   997 
   995 
   998   \end{proof} 
   996   \end{proof} 
   999 *}
   997 *}
  1010   final form, we make no comment thereon, preferring to give general reasons
  1008   final form, we make no comment thereon, preferring to give general reasons
  1011   for our belief that the approach of \cite{Sulzmann2014} is problematic.
  1009   for our belief that the approach of \cite{Sulzmann2014} is problematic.
  1012   Their central definition is an ``ordering relation'' defined by the
  1010   Their central definition is an ``ordering relation'' defined by the
  1013   rules (slightly adapted to fit our notation):
  1011   rules (slightly adapted to fit our notation):
  1014 
  1012 
  1015 \begin{center}\small  
  1013 \begin{center}  
  1016 \begin{tabular}{@ {}c@ {\hspace{4mm}}c@ {}}	
  1014 \begin{tabular}{@ {}c@ {\hspace{4mm}}c@ {}}	
  1017 @{thm[mode=Rule] C2[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1\<iota>" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\<iota>"]}\,(C2) &
  1015 @{thm[mode=Rule] C2[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1\<iota>" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\<iota>"]}\,(C2) &
  1018 @{thm[mode=Rule] C1[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\<iota>" "v\<^sub>1" "r\<^sub>1"]}\,(C1)\smallskip\\
  1016 @{thm[mode=Rule] C1[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2\<iota>" "v\<^sub>1" "r\<^sub>1"]}\,(C1)\smallskip\\
  1019 
  1017 
  1020 @{thm[mode=Rule] A1[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\,(A1) &
  1018 @{thm[mode=Rule] A1[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\,(A1) &