34 ders_syn ("_\\_" [79, 1000] 76) and |
34 ders_syn ("_\\_" [79, 1000] 76) and |
35 flat ("|_|" [75] 74) and |
35 flat ("|_|" [75] 74) and |
36 Sequ ("_ @ _" [78,77] 63) and |
36 Sequ ("_ @ _" [78,77] 63) and |
37 injval ("inj _ _ _" [79,77,79] 76) and |
37 injval ("inj _ _ _" [79,77,79] 76) and |
38 mkeps ("mkeps _" [79] 76) and |
38 mkeps ("mkeps _" [79] 76) and |
39 projval ("proj _ _ _" [1000,77,1000] 77) and |
39 (*projval ("proj _ _ _" [1000,77,1000] 77) and*) |
40 length ("len _" [78] 73) and |
40 length ("len _" [78] 73) and |
41 matcher ("lexer _ _" [78,78] 77) and |
41 matcher ("lexer _ _" [78,78] 77) and |
42 |
42 |
43 Prf ("_ : _" [75,75] 75) and |
43 Prf ("_ : _" [75,75] 75) and |
44 PMatch ("'(_, _') \<rightarrow> _" [63,75,75] 75) |
44 PMatch ("'(_, _') \<rightarrow> _" [63,75,75] 75) |
642 \begin{proof} |
642 \begin{proof} |
643 By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and a case |
643 By induction on the definition of @{term "s \<in> r \<rightarrow> v\<^sub>1"} and a case |
644 analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}.\qed |
644 analysis of @{term "s \<in> r \<rightarrow> v\<^sub>2"}.\qed |
645 \end{proof} |
645 \end{proof} |
646 |
646 |
647 \begin{lemma} |
647 \begin{lemma}\label{lemmkeps} |
648 @{thm[mode=IfThen] PMatch_mkeps} |
648 @{thm[mode=IfThen] PMatch_mkeps} |
649 \end{lemma} |
649 \end{lemma} |
650 |
650 |
651 \begin{proof} |
651 \begin{proof} |
652 By routine induction on @{term r}.\qed |
652 By routine induction on @{term r}.\qed |
653 \end{proof} |
653 \end{proof} |
654 |
654 |
655 \begin{lemma} |
655 \noindent |
|
656 The central lemma for our POSIX relation is that the @{text inj}-function |
|
657 preserves POSIX values. |
|
658 |
|
659 \begin{lemma}\label{PMatch2} |
656 @{thm[mode=IfThen] PMatch2_roy_version} |
660 @{thm[mode=IfThen] PMatch2_roy_version} |
657 \end{lemma} |
661 \end{lemma} |
|
662 |
|
663 \begin{proof} |
|
664 |
|
665 By induction on @{text r}. Suppose @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are |
|
666 two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term |
|
667 "s \<in> der c r\<^sub>1 \<rightarrow> v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term |
|
668 "s \<notin> L (der c r\<^sub>1)"} and @{term "s \<in> der c r\<^sub>2 \<rightarrow> v'"}. In @{text "(a)"} we |
|
669 know @{term "s \<in> der c r\<^sub>1 \<rightarrow> v'"}, from which we can infer @{term "(c # s) |
|
670 \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c # |
|
671 s) \<in> ALT r\<^sub>1 r\<^sub>2 \<rightarrow> injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly |
|
672 in subcase @{text "(b)"} where, however, in addition we have to use |
|
673 Prop~\ref{derprop}(2) in order to infer @{term "c # s \<notin> L r\<^sub>1"} from @{term |
|
674 "s \<notin> L (der c r\<^sub>1)"}. |
|
675 |
|
676 Suppose @{term "r = SEQ r\<^sub>1 r\<^sub>2"}. There are three subcases: |
|
677 |
|
678 \begin{quote} |
|
679 \begin{description} |
|
680 \item[@{text "(a)"}] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"} |
|
681 \item[@{text "(b)"}] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"} |
|
682 \item[@{text "(c)"}] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\<not> nullable r\<^sub>1"} |
|
683 \end{description} |
|
684 \end{quote} |
|
685 |
|
686 \noindent For @{text "(a)"} we know @{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} and |
|
687 @{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} as well as |
|
688 |
|
689 \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\] |
|
690 |
|
691 \noindent From the latter we can infer by Prop~\ref{derprop}(2): |
|
692 |
|
693 \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\] |
|
694 |
|
695 \noindent We can use the induction hypothesis for @{text "r\<^sub>1"} to obtain |
|
696 @{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"}. This allows us to infer |
|
697 @{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case @{text "(c)"} |
|
698 is similarly. |
|
699 |
|
700 For @{text "(b)"} we know @{term "s \<in> der c r\<^sub>2 \<rightarrow> v\<^sub>1"} and |
|
701 @{term "s\<^sub>1 @ s\<^sub>2 \<notin> L (SEQ (der c r\<^sub>1) r\<^sub>2)"}. From the former |
|
702 we have @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> (injval r\<^sub>2 c v\<^sub>1)"} by induction hypothesis |
|
703 for @{term "r\<^sub>2"}. From the latter we can infer |
|
704 |
|
705 \[@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = c # s \<and> s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"}\] |
|
706 |
|
707 \noindent By Lem.~\ref{lemmkeps} we know @{term "[] \<in> r\<^sub>1 \<rightarrow> (mkeps r\<^sub>1)"} |
|
708 holds. Putting this all together, we can conclude with @{term "(c # |
|
709 s) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (mkeps r\<^sub>1) (injval r\<^sub>2 c v\<^sub>1)"}. |
|
710 |
|
711 Finally suppose @{term "r = STAR r\<^sub>1"}. This case is very similar to the |
|
712 sequence case, except that we need to ensure that @{term "flat (injval r\<^sub>1 |
|
713 c v\<^sub>1) \<noteq> []"}. This follows by Lem.~\ref{posixbasic} from @{term "(c # s\<^sub>1) |
|
714 \<in> r' \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} (which in turn follows from @{term "s\<^sub>1 \<in> der c |
|
715 r\<^sub>1 \<rightarrow> v\<^sub>1"} and the induction hypothesis).\qed |
|
716 \end{proof} |
|
717 |
|
718 \noindent |
|
719 With Lem.~\ref{PMatch2} in place, it is completely routine to establish |
|
720 that the Sulzmann and Lu lexer satisfies its specification (returning |
|
721 an ``error'' iff the string is not in the language of the regular expression, |
|
722 and returning a unique POSIX value iff the string \emph{is} in the language): |
658 |
723 |
659 \begin{theorem}\mbox{}\smallskip\\ |
724 \begin{theorem}\mbox{}\smallskip\\ |
660 \begin{tabular}{ll} |
725 \begin{tabular}{ll} |
661 (1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\ |
726 (1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\ |
662 (2) & @{thm (lhs) lex_correct3a} if and only if @{thm (rhs) lex_correct3a}\\ |
727 (2) & @{thm (lhs) lex_correct3b} if and only if @{thm (rhs) lex_correct3b}\\ |
663 \end{tabular} |
728 \end{tabular} |
664 \end{theorem} |
729 \end{theorem} |
|
730 |
|
731 \begin{proof} |
|
732 By induction on @{term s}.\qed |
|
733 \end{proof} |
|
734 |
|
735 This concludes our correctness proof. Note that we have not changed the |
|
736 algorithm by Sulzmann and Lu, but introduced our own specification for |
|
737 what a correct result---a POSIX value---should be. |
665 *} |
738 *} |
666 |
739 |
667 section {* The Argument by Sulzmmann and Lu *} |
740 section {* The Argument by Sulzmmann and Lu *} |
668 |
741 |
669 section {* Conclusion *} |
742 section {* Conclusion *} |
679 |
752 |
680 |
753 |
681 \noindent |
754 \noindent |
682 We have also introduced a slightly restricted version of this relation |
755 We have also introduced a slightly restricted version of this relation |
683 where the last rule is restricted so that @{term "flat v \<noteq> []"}. |
756 where the last rule is restricted so that @{term "flat v \<noteq> []"}. |
684 This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}. |
|
685 \bigskip |
757 \bigskip |
686 |
758 |
687 |
759 |
688 \noindent |
760 \noindent |
689 |
761 |
690 |
762 |
691 \noindent |
763 \noindent |
692 Our version of Sulzmann's ordering relation |
764 Our version of Sulzmann's ordering relation |
693 |
765 |
694 \begin{center} |
766 |
695 \begin{tabular}{c} |
|
696 @{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad |
|
697 @{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\ |
|
698 @{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad |
|
699 @{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
|
700 @{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad |
|
701 @{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "r\<^sub>2"]} \medskip\\ |
|
702 @{thm[mode=Axiom] ValOrd.intros(7)}\qquad |
|
703 @{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\ |
|
704 @{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad |
|
705 @{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\ |
|
706 @{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\ |
|
707 @{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad |
|
708 @{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\ |
|
709 \end{tabular} |
|
710 \end{center} |
|
711 |
|
712 \noindent |
|
713 A prefix of a string s |
|
714 |
|
715 \begin{center} |
|
716 \begin{tabular}{c} |
|
717 @{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]} |
|
718 \end{tabular} |
|
719 \end{center} |
|
720 |
|
721 \noindent |
|
722 Values and non-problematic values |
|
723 |
|
724 \begin{center} |
|
725 \begin{tabular}{c} |
|
726 @{thm Values_def}\medskip\\ |
|
727 \end{tabular} |
|
728 \end{center} |
|
729 |
|
730 |
|
731 \noindent |
|
732 The point is that for a given @{text s} and @{text r} there are only finitely many |
|
733 non-problematic values. |
|
734 *} |
767 *} |
735 |
768 |
736 text {* |
769 text {* |
737 \noindent |
770 \noindent |
738 Some lemmas we have proved:\bigskip |
771 Some lemmas we have proved:\bigskip |
739 |
772 |
740 @{thm L_flat_Prf} |
773 @{thm L_flat_Prf} |
741 |
774 |
742 @{thm L_flat_NPrf} |
|
743 |
|
744 @{thm[mode=IfThen] mkeps_nullable} |
775 @{thm[mode=IfThen] mkeps_nullable} |
745 |
776 |
746 @{thm[mode=IfThen] mkeps_flat} |
777 @{thm[mode=IfThen] mkeps_flat} |
747 |
778 |
748 @{thm[mode=IfThen] Prf_injval} |
779 @{thm[mode=IfThen] Prf_injval} |
751 |
782 |
752 @{thm[mode=IfThen] PMatch_mkeps} |
783 @{thm[mode=IfThen] PMatch_mkeps} |
753 |
784 |
754 @{thm[mode=IfThen] PMatch1(2)} |
785 @{thm[mode=IfThen] PMatch1(2)} |
755 |
786 |
756 @{thm[mode=IfThen] PMatch1N} |
|
757 |
|
758 @{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} |
787 @{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} |
759 |
788 |
760 \medskip |
|
761 \noindent |
|
762 This is the main theorem that lets us prove that the algorithm is correct according to |
|
763 @{term "s \<in> r \<rightarrow> v"}: |
|
764 |
|
765 @{thm[mode=IfThen] PMatch2} |
|
766 |
|
767 \mbox{}\bigskip |
|
768 |
789 |
769 \noindent {\bf Proof} The proof is by induction on the definition of |
790 \noindent {\bf Proof} The proof is by induction on the definition of |
770 @{const der}. Other inductions would go through as well. The |
791 @{const der}. Other inductions would go through as well. The |
771 interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the |
792 interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the |
772 case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis |
793 case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis |