52 F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and |
52 F_SEQ1 ("F\<^bsub>Seq1\<^esub> _ _") and |
53 F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and |
53 F_SEQ2 ("F\<^bsub>Seq2\<^esub> _ _") and |
54 F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and |
54 F_SEQ ("F\<^bsub>Seq\<^esub> _ _") and |
55 simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and |
55 simp_SEQ ("simp\<^bsub>Seq\<^esub> _ _" [1000, 1000] 1) and |
56 simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and |
56 simp_ALT ("simp\<^bsub>Alt\<^esub> _ _" [1000, 1000] 1) and |
57 slexer ("lexer\<^sup>+ _ _" [78,78] 77) and |
57 slexer ("lexer\<^sup>+" 1000) and |
58 |
58 |
59 ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and |
59 ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and |
60 ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77) |
60 ValOrdEq ("_ \<ge>\<^bsub>_\<^esub> _" [77,77,77] 77) |
61 |
61 |
62 definition |
62 definition |
776 With Lem.~\ref{Posix2} in place, it is completely routine to establish |
776 With Lem.~\ref{Posix2} in place, it is completely routine to establish |
777 that the Sulzmann and Lu lexer satisfies our specification (returning |
777 that the Sulzmann and Lu lexer satisfies our specification (returning |
778 the null value @{term "None"} iff the string is not in the language of the regular expression, |
778 the null value @{term "None"} iff the string is not in the language of the regular expression, |
779 and returning a unique POSIX value iff the string \emph{is} in the language): |
779 and returning a unique POSIX value iff the string \emph{is} in the language): |
780 |
780 |
781 \begin{theorem}\mbox{}\smallskip\\ |
781 \begin{theorem}\mbox{}\smallskip\\\label{lexercorrect} |
782 \begin{tabular}{ll} |
782 \begin{tabular}{ll} |
783 (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\ |
783 (1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\ |
784 (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\ |
784 (2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\ |
785 \end{tabular} |
785 \end{tabular} |
786 \end{theorem} |
786 \end{theorem} |
926 and then simplify the result. This gives us a simplified derivative |
926 and then simplify the result. This gives us a simplified derivative |
927 @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The lexer |
927 @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The lexer |
928 is then recursively called with the simplified derivative, but before |
928 is then recursively called with the simplified derivative, but before |
929 we inject the character @{term c} into the value @{term v}, we need to rectify |
929 we inject the character @{term c} into the value @{term v}, we need to rectify |
930 @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness |
930 @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness |
931 of @{const "slexer"}, we need to show that simplification preserves the language |
931 of @{term "slexer"}, we need to show that simplification preserves the language |
932 and the relation between simplification and our posix definition: |
932 and simplification preserves our POSIX relation once the value is rectified |
933 |
933 (recall @{const "simp"} generates a regular expression, rectification function pair): |
934 \begin{lemma}\mbox{}\smallskip\\ |
934 |
|
935 \begin{lemma}\mbox{}\smallskip\\\label{slexeraux} |
935 \begin{tabular}{ll} |
936 \begin{tabular}{ll} |
936 (1) & @{thm L_fst_simp[symmetric]}\\ |
937 (1) & @{thm L_fst_simp[symmetric]}\\ |
937 (2) & @{thm[mode=IfThen] Posix_simp} |
938 (2) & @{thm[mode=IfThen] Posix_simp} |
938 \end{tabular} |
939 \end{tabular} |
939 \end{lemma} |
940 \end{lemma} |
940 |
941 |
941 \noindent |
942 \begin{proof} |
942 We can now prove that |
943 Both are by induction on @{text r}. There is no interesting case for the |
|
944 first statement. For the second statement of interest are the @{term "r = SEQ r\<^sub>1 r\<^sub>2"} |
|
945 and @{term "r = ALT r\<^sub>1 r\<^sub>2"} cases. |
|
946 |
|
947 \end{proof} |
|
948 |
|
949 \noindent We can now prove relatively straightforwardly that the |
|
950 optimised lexer produce the expected result: |
943 |
951 |
944 \begin{theorem} |
952 \begin{theorem} |
945 @{thm slexer_correctness} |
953 @{thm slexer_correctness} |
946 \end{theorem} |
954 \end{theorem} |
947 |
955 |
948 \noindent |
956 \begin{proof} By induction on @{term s} generalising over @{term |
949 holds but for lack of space refer the reader to our mechanisation for details. |
957 r}. The case @{term "[]"} is trivial. For the cons-case suppose the |
950 |
958 string is of the form @{term "c # s"}. By induction hypothesis we |
|
959 know @{term "slexer r s = lexer r s"} holds for all @{term r} (in |
|
960 particular for @{term "r"} being the derivative @{term "der c |
|
961 r"}). Let @{term "r\<^sub>s"} be the simplified derivative regular expression, @{term |
|
962 "fst (simp (der c r))"}, and @{term "f\<^sub>r"} be the rectification |
|
963 function, @{term "snd (simp (der c r))"}. We distinguish the cases |
|
964 whether (*) @{term "s \<in> L (der c r)"} or not. In the first case we |
|
965 have by Thm~\ref{lexercorrect}(2) a value @{term "v"} so that @{term |
|
966 "lexer (der c r) s = Some v"} and @{term "s \<in> der c r \<rightarrow> v"} hold. |
|
967 By Lem~\ref{slexeraux}(1) we can also infer from~(*) that @{term "s |
|
968 \<in> L r\<^sub>s"} holds. Hence we know by Thm~\ref{lexercorrect}(2) that |
|
969 there exists a @{term "v'"} with @{term "lexer r\<^sub>s s = Some v'"} and |
|
970 @{term "s \<in> r\<^sub>s \<rightarrow> v'"}. From the latter we know by |
|
971 Lem~\ref{slexeraux}(2) that @{term "s \<in> der c r \<rightarrow> (f\<^sub>r v')"} holds. |
|
972 By the uniqueness of the POSIX relation (Thm~\ref{posixdeterm}) we |
|
973 can infer that @{term v} is equal to @{term "f\<^sub>r v'"}---that is the |
|
974 rectification function applied to @{term "v'"} |
|
975 produces the original @{term "v"}. Now the case follows by the |
|
976 definitions of @{const lexer} and @{const slexer}. |
|
977 |
|
978 In the second case where @{term "s \<notin> L (der c r)"} we have that |
|
979 @{term "lexer (der c r) s = None"} by Thm~\ref{lexercorrect}(1). We |
|
980 also know by Lem~\ref{slexeraux}(1) that @{term "s \<notin> L r\<^sub>s"}. Hence |
|
981 @{term "lexer r\<^sub>s s = None"} by Thm~\ref{lexercorrect}(1) and |
|
982 by IH then also @{term "slexer r\<^sub>s s = None"}. With this we can |
|
983 conclude in this case too.\qed |
|
984 |
|
985 \end{proof} |
951 *} |
986 *} |
952 |
987 |
953 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} |
988 section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} |
954 |
989 |
955 text {* |
990 text {* |