# HG changeset patch # User Christian Urban # Date 1457387690 0 # Node ID 7c6c907660d89e7ceef155745671fffe0b9ee911 # Parent 4c85af262ee792bbdbfc4b2ca358ff64f61d6464 updated diff -r 4c85af262ee7 -r 7c6c907660d8 thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Mon Mar 07 18:56:41 2016 +0000 +++ b/thys/Paper/Paper.thy Mon Mar 07 21:54:50 2016 +0000 @@ -386,7 +386,7 @@ unique value that satisfies the (informal) POSIX rules from the Introduction. Graphically the POSIX value calculation algorithm by Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} - where the path from the left to the right involving @{const der}/@{const + where the path from the left to the right involving @{term derivatives}/@{const nullable} is the first phase of the algorithm (calculating successive \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to left, the second phase. This picture shows the steps required when a @@ -494,7 +494,7 @@ might be instructive to look first at the three sequence cases (clauses (4)--(6)). In each case we need to construct an ``injected value'' for @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term - "Seq DUMMY DUMMY"}. Recall the clause of the @{const der}-function + "Seq DUMMY DUMMY"}. Recall the clause of the @{text derivative}-function for sequence regular expressions: \begin{center} @@ -529,15 +529,21 @@ The idea of @{term inj} to ``inject back'' a character into a value can be made precise by the first part of the following lemma; the second part shows that the underlying string of an @{const mkeps}-value is - the empty string. + always the empty string. - \begin{lemma}\mbox{}\\\label{Prf_injval_flat} + \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat} \begin{tabular}{ll} (1) & @{thm[mode=IfThen] Prf_injval_flat}\\ (2) & @{thm[mode=IfThen] mkeps_flat} \end{tabular} \end{lemma} + \begin{proof} + Both properties are by routine inductions: the first one, for example, + by an induction over the definition of @{term derivatives}; the second by + induction on @{term r}. There are no interesting cases. + \end{proof} + Having defined the @{const mkeps} and @{text inj} function we can extend \Brz's matcher so that a [lexical] value is constructed (assuming the regular expression matches the string). The clauses of the lexer are @@ -551,7 +557,7 @@ \end{tabular} \end{center} - \noindent If the regular expression does not match, @{const None} is + \noindent If the regular expression does not match the string, @{const None} is returned, indicating an error is raised. If the regular expression does match the string, then @{const Some} value is returned. One important virtue of this algorithm is that it can be implemented with ease in a @@ -562,53 +568,82 @@ match and priority rule; as correctly argued in \cite{Sulzmann2014}, this needs formal specification. Sulzmann and Lu define a \emph{dominance} relation\footnote{Sulzmann and Lu call it an ordering relation, but - without giving evidence that it is transitive.} between values and argue that - there is a maximum value, as given by the derivative-based algorithm. In - contrast, we shall next introduce a simple inductive definition to specify - what a \emph{POSIX value} is, incorporating the POSIX-specific choices - into the side-conditions of our rules. Our definition is inspired by the - matching relation given in \cite{Vansummeren2006}. The relation we define - is ternary and written as \mbox{@{term "s \ r \ v"}}, relating strings, - regular expressions and values. + without giving evidence that it is transitive.} between values and argue + that there is a maximum value, as given by the derivative-based algorithm. + In contrast, we shall introduce a simple inductive definition that + specifies directly what a \emph{POSIX value} is, incorporating the + POSIX-specific choices into the side-conditions of our rules. Our + definition is inspired by the matching relation given in + \cite{Vansummeren2006}. The relation we define is ternary and written as + \mbox{@{term "s \ r \ v"}}, relating strings, regular expressions and + values. \begin{center} \begin{tabular}{c} - @{thm[mode=Axiom] PMatch.intros(1)} \qquad - @{thm[mode=Axiom] PMatch.intros(2)}\bigskip\\ - @{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad - @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\bigskip\\ + @{thm[mode=Axiom] PMatch.intros(1)}@{text "P"}@{term "ONE"} \qquad + @{thm[mode=Axiom] PMatch.intros(2)}@{text "P"}@{term "c"}\bigskip\\ + @{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad + @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}@{text "P+R"}\bigskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad @{thm (prem 2) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ @{thm (prem 3) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} - {@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\\ - @{thm[mode=Axiom] PMatch.intros(7)}\bigskip\\ + {@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$@{text "PS"}\\ + @{thm[mode=Axiom] PMatch.intros(7)}@{text "P[]"}\bigskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 2) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 3) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ @{thm (prem 4) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} - {@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$ + {@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$@{text "P\"} \end{tabular} \end{center} \noindent We claim that this relation captures the idea behind the two - informal POSIX rules shown in the Introduction: Consider the second line - where the POSIX values for an alternative regular expression is - specified---it is always a @{text "Left"}-value, \emph{except} when the - string to be matched is not in the language of @{term "r\<^sub>1"}; only then it - is a @{text Right}-value (see the side-condition in the rule on the - right). Interesting is also the rule for sequence regular expressions - (third line). The first two premises state that @{term "v\<^sub>1"} and - @{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1, - r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively. - + informal POSIX rules shown in the Introduction: Consider for example the + rules @{text "P+L"} and @{text "P+R"} where the POSIX value for an + alternative regular expression is specified---it is always a @{text + "Left"}-value, \emph{except} when the string to be matched is not in the + language of @{term "r\<^sub>1"}; only then it is a @{text Right}-value (see the + side-condition in @{text "P+R"}). Interesting is also the rule for + sequence regular expressions (@{text "PS"}). The first two premises state + that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} are the POSIX values for + @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} + respectively. Consider now the third premise and note that the POSIX value + of this rule should match the string @{term "s\<^sub>1 @ s\<^sub>2"}. According to the + longest match rule, we want that the @{term "s\<^sub>1"} is the longest initial + split of @{term "s\<^sub>1 @ s\<^sub>2"} such that @{term "s\<^sub>2"} is still recognised + by @{term "r\<^sub>2"}. Let us assume, contrary to the third premise, that there + \emph{exists} an @{term "s\<^sub>3"} and @{term "s\<^sub>4"} such that @{term "s\<^sub>2"} + can be split up into a non-empty @{term "s\<^sub>3"} and @{term "s\<^sub>4"}. Moreover + the longer @{term "s\<^sub>1 @ s\<^sub>3"} can be matched by @{text "r\<^sub>1"} and the + shorter @{term "s\<^sub>4"} can still be matched by @{term "r\<^sub>2"}. In this case + @{term "s\<^sub>1"} would not be the longest initial split of @{term "s\<^sub>1 @ + s\<^sub>2"} and therefore @{term "Seq v\<^sub>1 v\<^sub>2"} cannot be a POSIX value + for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. A similar condition is imposed + onto the POSIX value in the @{text "P\"}-rule. Also there we want that + @{term "s\<^sub>1"} is the longest initial split of @{term "s\<^sub>1 @ s\<^sub>2"} and + furthermore the corresponding value @{term v} cannot be flatten to + the empty string. In effect, we require that in each ``iteration'' + of the star, some parts of the string need to be ``nibbled'' away; only + in case of the empty string weBy accept @{term "Stars []"} as the + POSIX value. + + We can prove that given a string @{term s} and regular expression @{term + r}, the POSIX value @{term v} is uniquely determined by @{term "s \ r \ + v"}. + \begin{theorem} @{thm[mode=IfThen] PMatch_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]} \end{theorem} + \begin{proof} + By induction on the definition of @{term "s \ r \ v\<^sub>1"} and a case + analysis of @{term "s \ r \ v\<^sub>2"}. + \end{proof} + \begin{lemma} @{thm[mode=IfThen] PMatch_mkeps} \end{lemma} diff -r 4c85af262ee7 -r 7c6c907660d8 thys/ReStar.thy --- a/thys/ReStar.thy Mon Mar 07 18:56:41 2016 +0000 +++ b/thys/ReStar.thy Mon Mar 07 21:54:50 2016 +0000 @@ -460,6 +460,14 @@ apply(auto intro: Prf.intros) done +lemma PMatch1a: + assumes "s \ r \ v" + shows "s \ L r" +using assms +apply(induct s r v rule: PMatch.induct) +apply(auto simp add: Sequ_def) +done + lemma PMatch_mkeps: assumes "nullable r" shows "[] \ r \ mkeps r" @@ -482,105 +490,60 @@ apply(metis PMatch.intros(7)) done -find_theorems Stars -thm compat_val_list.induct compat_val.induct - - lemma PMatch_determ: - shows "\s \ r \ v1; s \ r \ v2\ \ v1 = v2" - and "\s \ (STAR r) \ Stars vs1; s \ (STAR r) \ Stars vs2\ \ vs1 = vs2" -apply(induct v1 and vs1 arbitrary: s r v2 and s r vs2 rule: compat_val.induct compat_val_list.induct) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(erule PMatch.cases) -apply(simp_all)[7] + assumes "s \ r \ v1" "s \ r \ v2" + shows "v1 = v2" +using assms +apply(induct s r v1 arbitrary: v2 rule: PMatch.induct) apply(erule PMatch.cases) apply(simp_all)[7] apply(erule PMatch.cases) apply(simp_all)[7] -apply(clarify) -apply(subgoal_tac "s1 = s1a \ s2 = s2a") -apply metis -apply(rule conjI) -apply(simp add: append_eq_append_conv2) -apply(auto)[1] -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) -apply(simp add: append_eq_append_conv2) -apply(auto)[1] -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) +apply(rotate_tac 2) apply(erule PMatch.cases) -apply(simp_all)[7] +apply(simp_all (no_asm_use))[7] +apply(clarify) +apply(force) apply(clarify) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply (metis PMatch1(1) PMatch1(2) Prf_flat_L) -(* star case *) -defer +apply(drule PMatch1a) +apply(simp) +apply(rotate_tac 3) apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply (metis PMatch1(2)) -apply(rotate_tac 3) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) +apply(simp_all (no_asm_use))[7] +apply(drule PMatch1a)+ +apply(simp) +apply(simp) +apply(rotate_tac 5) apply(erule PMatch.cases) -apply(simp_all)[7] +apply(simp_all (no_asm_use))[7] apply(clarify) -apply(subgoal_tac "s1 = s1a \ s2 = s2a") -apply metis +apply(subgoal_tac "s1 = s1a") +apply(blast) apply(simp add: append_eq_append_conv2) -apply(auto)[1] -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) +apply(clarify) +apply (metis PMatch1a append_self_conv) +apply(rotate_tac 6) apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply (metis PMatch1(2)) -apply(erule PMatch.cases) -apply(simp_all)[7] -apply(clarify) -apply(erule PMatch.cases) -apply(simp_all)[7] +apply(simp_all (no_asm_use))[7] apply(clarify) -apply(subgoal_tac "s1 = s1a \ s2 = s2a") -apply(drule_tac x="s1 @ s2" in meta_spec) -apply(drule_tac x="rb" in meta_spec) -apply(drule_tac x="(va#vsa)" in meta_spec) +apply(subgoal_tac "s1 = s1a") apply(simp) -apply(drule meta_mp) -apply (metis L.simps(6) PMatch.intros(6)) -apply (metis L.simps(6) PMatch.intros(6)) -apply(simp add: append_eq_append_conv2) -apply(auto)[1] -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis L.simps(6) PMatch1(1) PMatch1(2) Prf_flat_L) -apply (metis PMatch1(2)) +apply(blast) +apply(simp (no_asm_use) add: append_eq_append_conv2) +apply(clarify) +apply (metis L.simps(6) PMatch1a append_self_conv) +apply(clarify) +apply(rotate_tac 2) apply(erule PMatch.cases) -apply(simp_all)[7] +apply(simp_all (no_asm_use))[7] +using PMatch1(2) apply auto[1] +using PMatch1(2) apply blast +apply(erule PMatch.cases) +apply(simp_all (no_asm_use))[7] apply(clarify) -by (metis PMatch1(2)) +apply (simp add: PMatch1(2)) +apply(simp) +done (* a proof that does not need proj *) lemma PMatch2_roy_version: diff -r 4c85af262ee7 -r 7c6c907660d8 thys/paper.pdf Binary file thys/paper.pdf has changed