# HG changeset patch # User Christian Urban # Date 1503064289 -3600 # Node ID 6746f5e1f1f84403ad555779728a547a1cd4bb17 # Parent 32b222d77fa0303012e2f41a42a81de6f207fb5e updated diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Exercises.thy --- a/thys/Exercises.thy Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Exercises.thy Fri Aug 18 14:51:29 2017 +0100 @@ -49,7 +49,8 @@ using Nil_is_append_conv apply blast apply blast apply(auto) -using Star_string by fastforce +using Star_cstring +by (metis concat_eq_Nil_conv) lemma atmostempty_correctness_aux: @@ -78,7 +79,7 @@ assumes "A \ {[]}" shows "A\ \ {[]}" using assms -using Star_string concat_eq_Nil_conv empty_iff insert_iff subsetI subset_singletonD by fastforce +using Star_cstring concat_eq_Nil_conv empty_iff insert_iff subsetI subset_singletonD by fastforce lemma Star_empty_string_finite: shows "finite ({[]}\)" diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Journal/Paper.thy Fri Aug 18 14:51:29 2017 +0100 @@ -63,7 +63,6 @@ set ("_" [73] 73) and Prf ("_ : _" [75,75] 75) and - CPrf ("_ \<^raw:\mbox{\textbf{\textlengthmark}}> _" [75,75] 75) and Posix ("'(_, _') \ _" [63,75,75] 75) and lexer ("lexer _ _" [78,78] 77) and @@ -83,20 +82,18 @@ PosOrd_ex ("_ \ _" [77,77] 77) and PosOrd_ex_eq ("_ \<^raw:\mbox{$\preccurlyeq$}> _" [77,77] 77) and pflat_len ("\_\\<^bsub>_\<^esub>") and - nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) + nprec ("_ \<^raw:\mbox{$\not\prec$}> _" [77,77] 77) and - (* - ValOrd ("_ >\<^bsub>_\<^esub> _" [77,77,77] 77) and - ValOrdEq ("_ \\<^bsub>_\<^esub> _" [77,77,77] 77) - *) + DUMMY ("\<^raw:\underline{\hspace{2mm}}>") + definition "match r s \ nullable (ders s r)" -lemma CV_STAR_ONE_empty: - shows "CV (STAR ONE) [] = {Stars []}" -by(auto simp add: CV_def elim: CPrf.cases intro: CPrf.intros) +lemma LV_STAR_ONE_empty: + shows "LV (STAR ONE) [] = {Stars []}" +by(auto simp add: LV_def elim: Prf.cases intro: Prf.intros) @@ -139,7 +136,7 @@ one way of how the string is matched. There are two commonly used disambiguation strategies to generate a unique answer: one is called GREEDY matching \cite{Frisch2004} and the other is POSIX -matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2013,Sulzmann2014,Vansummeren2006}. For example consider +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. For example consider the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be matched in two `iterations' by the single letter-regular expressions @{term x} and @{term y}, or directly @@ -149,14 +146,15 @@ instant gratification to delayed repletion). The second case is POSIX matching, which prefers the longest match. -In the context of lexing, where an input string needs to be split up into a -sequence of tokens, POSIX is the more natural disambiguation strategy for -what programmers consider basic syntactic building blocks in their programs. -These building blocks are often specified by some regular expressions, say -@{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and -identifiers, respectively. There are a few underlying (informal) rules behind -tokenising a string in a POSIX \cite{POSIX} fashion according to a collection of regular -expressions: +In the context of lexing, where an input string needs to be split up +into a sequence of tokens, POSIX is the more natural disambiguation +strategy for what programmers consider basic syntactic building blocks +in their programs. These building blocks are often specified by some +regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text +"r\<^bsub>id\<^esub>"} for recognising keywords and identifiers, +respectively. There are a few underlying (informal) rules behind +tokenising a string in a POSIX \cite{POSIX} fashion according to a +collection of regular expressions: \begin{itemize} \item[$\bullet$] \emph{The Longest Match Rule} (or \emph{``{M}aximal {M}unch {R}ule''}): @@ -171,20 +169,28 @@ not match an empty string unless this is the only match for the repetition.\smallskip \item[$\bullet$] \emph{Empty String Rule:} An empty string shall be considered to -be longer than no match at all.\marginpar{Explain its purpose} +be longer than no match at all. \end{itemize} -\noindent Consider for example a regular expression @{text "r\<^bsub>key\<^esub>"} for recognising keywords -such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} +\noindent Consider for example a regular expression @{text +"r\<^bsub>key\<^esub>"} for recognising keywords such as @{text "if"}, +@{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising identifiers (say, a single character followed by characters or numbers). Then we can form the regular expression -@{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} and use POSIX matching to tokenise strings, -say @{text "iffoo"} and @{text "if"}. For @{text "iffoo"} we obtain -by the Longest Match Rule a single identifier token, not a keyword -followed by an identifier. For @{text "if"} we obtain by the Priority -Rule a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} -matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} matches @{text "iffoo"}, respectively @{text "if"}, in exactly one -`iteration' of the star. +@{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} +and use POSIX matching to tokenise strings, say @{text "iffoo"} and +@{text "if"}. For @{text "iffoo"} we obtain by the Longest Match Rule +a single identifier token, not a keyword followed by an +identifier. For @{text "if"} we obtain by the Priority Rule a keyword +token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} +matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + +r\<^bsub>id\<^esub>)\<^sup>\"} matches @{text "iffoo"}, +respectively @{text "if"}, in exactly one `iteration' of the star. The +Empty String Rule is for cases where @{text +"(a\<^sup>\)\<^sup>\"}, for example, matches against the +string @{text "bc"}. Then the longest initial matched substring is the +empty string, which is matched by both the whole regular expression +and the parenthesised sub-expression. One limitation of Brzozowski's matcher is that it only generates a @@ -205,11 +211,10 @@ algorithm. This proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a GREEDY regular expression matching algorithm. However, we were not able to establish transitivity and -totality for the ``order relation'' by Sulzmann and Lu. \marginpar{We probably drop this section} -??In Section -\ref{argu} we identify some inherent problems with their approach (of +totality for the ``order relation'' by Sulzmann and Lu. +There are some inherent problems with their approach (of which some of the proofs are not published in \cite{Sulzmann2014}); -perhaps more importantly, we give a simple inductive (and +perhaps more importantly, we give in this paper a simple inductive (and algorithm-independent) definition of what we call being a {\em POSIX value} for a regular expression @{term r} and a string @{term s}; we show that the algorithm computes such a value and that such a value is @@ -252,7 +257,7 @@ Our specification of a POSIX value consists of a simple inductive definition that given a string and a regular expression uniquely determines this value. We also show that our definition is equivalent to an ordering -of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2013}. +of values based on positions by Okui and Suzuki \cite{OkuiSuzuki2010}. Derivatives as calculated by Brzozowski's method are usually more complex regular expressions than the initial one; various optimisations are possible. We prove the correctness when simplifications of @{term "ALT ZERO @@ -402,9 +407,10 @@ text {* - The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to use - values for encoding \emph{how} a regular expression matches a string - and then define a function on values that mirrors (but inverts) the + There have been many previous works that use values for encoding + \emph{how} a regular expression matches a string. + The clever idea by Sulzmann and Lu \cite{Sulzmann2014} is to + define a function on values that mirrors (but inverts) the construction of the derivative on regular expressions. \emph{Values} are defined as the inductive datatype @@ -440,98 +446,89 @@ \end{center} \noindent Sulzmann and Lu also define inductively an inhabitation relation - that associates values to regular expressions + that associates values to regular expressions. We define this relation as + follows:\footnote{Note that the rule for @{term Stars} differs from our + erlier paper \cite{AusafDyckhoffUrban2016}. There we used the original + definition by Sulzmann and Lu which does not require that the values @{term "v \ set vs"} + flatten to a non-empty string. The reason for introducing the + more restricted version of lexical values is convenience later + on when reasoning about + an ordering relation for values.} \begin{center} - \begin{tabular}{c} + \begin{tabular}{c@ {\hspace{12mm}}c} \\[-8mm] - @{thm[mode=Axiom] Prf.intros(4)} \qquad + @{thm[mode=Axiom] Prf.intros(4)} & @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm] - @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad + @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} & @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm] - @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} \qquad + @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]} & @{thm[mode=Rule] Prf.intros(6)[of "vs"]} \end{tabular} \end{center} - \noindent - where in the clause for @{const "Stars"} we use the notation @{term "v \ set vs"} - for indicating that @{text v} is a member in the list @{text vs}. - Note that no values are associated with the regular expression - @{term ZERO}, and that the only value associated with the regular - expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text - "Void"}. It is routine to establish how values ``inhabiting'' a regular - expression correspond to the language of a regular expression, namely + \noindent where in the clause for @{const "Stars"} we use the + notation @{term "v \ set vs"} for indicating that @{text v} is a + member in the list @{text vs}. We require in this rule that every + value in @{term vs} flattens to a non-empty string. The idea is that + @{term "Stars"}-values satisfy the informal Star Rule (see Introduction) + where the $^\star$ does not match the empty string unless this is + the only match for the repetition. Note also that no values are + associated with the regular expression @{term ZERO}, and that the + only value associated with the regular expression @{term ONE} is + @{term Void}. It is routine to establish how values ``inhabiting'' + a regular expression correspond to the language of a regular + expression, namely \begin{proposition} @{thm L_flat_Prf} \end{proposition} \noindent - Given a regular expression @{text r} and a string @{text s}, we can define the + Given a regular expression @{text r} and a string @{text s}, we define the set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string - being @{text s} by + being @{text s}:\footnote{Okui and Suzuki refer to our lexical values + as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic + values} by Cardelli and Frisch \cite{Frisch2004} is similar, but not identical + to our lexical values.} \begin{center} @{thm LV_def} \end{center} - \noindent However, later on it will sometimes be necessary to - restrict the set of lexical values to a subset called - \emph{Canonical Values}. The idea of canonical values is that they - satisfy the Star Rule (see Introduction) where the $^\star$ does not - match the empty string unless this is the only match for the - repetition. One way to define canonical values formally is to use a - stronger inhabitation relation, written @{term "\ DUMMY : DUMMY"}, which has the same rules as @{term - "\ DUMMY : DUMMY"} shown above, except that the rule for - @{term Stars} has - the additional side-condition of flattened values not being the - empty string, namely + \noindent The main property of @{term "LV r s"} is that it is alway finite. + + \begin{proposition} + @{thm LV_finite} + \end{proposition} - \begin{center} - @{thm [mode=Rule] CPrf.intros(6)} - \end{center} - - \noindent - With this we can define - - \begin{center} - @{thm CV_def} - \end{center} - - \noindent - Clearly we have @{thm LV_CV_subset}. - The main point of canonical values is that for every regular expression @{text r} and every - string @{text s}, the set @{term "CV r s"} is finite. + \noindent This finiteness property does not hold in general if we + remove the side-condition about @{term "flat v \ []"} in the + @{term Stars}-rule above. For example using Sulzmann and Lu's + less restrictive definition, @{term "LV (STAR ONE) []"} would contain + infinitely many values, but according to our more restricted + definition @{thm LV_STAR_ONE_empty}. - \begin{lemma} - @{thm CV_finite} - \end{lemma} - - \noindent This finiteness property does not generally hold for lexical values where - for example @{term "LV (STAR ONE) []"} contains infinitely many - values, but @{thm CV_STAR_ONE_empty}. However, if a regular - expression @{text r} matches a string @{text s}, then in general the - set @{term "CV r s"} is not just a - singleton set. In case of POSIX matching the problem is to - calculate the unique value that satisfies the (informal) POSIX rules - from the Introduction. It will turn out that this POSIX value is in fact a - canonical value. - - Graphically the POSIX value calculation algorithm by - Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} - where the path from the left to the right involving @{term derivatives}/@{const - nullable} is the first phase of the algorithm (calculating successive - \Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to - left, the second phase. This picture shows the steps required when a - regular expression, say @{text "r\<^sub>1"}, matches the string @{term - "[a,b,c]"}. We first build the three derivatives (according to @{term a}, - @{term b} and @{term c}). We then use @{const nullable} to find out - whether the resulting derivative regular expression @{term "r\<^sub>4"} - can match the empty string. If yes, we call the function @{const mkeps} - that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can - match the empty string (taking into account the POSIX constraints in case - there are several ways). This function is defined by the clauses: + If a regular expression @{text r} matches a string @{text s}, then + generally the set @{term "LV r s"} is not just a singleton set. In + case of POSIX matching the problem is to calculate the unique lexical value + that satisfies the (informal) POSIX rules from the Introduction. + Graphically the POSIX value calculation algorithm by Sulzmann and Lu + can be illustrated by the picture in Figure~\ref{Sulz} where the + path from the left to the right involving @{term + derivatives}/@{const nullable} is the first phase of the algorithm + (calculating successive \Brz's derivatives) and @{const + mkeps}/@{text inj}, the path from right to left, the second + phase. This picture shows the steps required when a regular + expression, say @{text "r\<^sub>1"}, matches the string @{term + "[a,b,c]"}. We first build the three derivatives (according to + @{term a}, @{term b} and @{term c}). We then use @{const nullable} + to find out whether the resulting derivative regular expression + @{term "r\<^sub>4"} can match the empty string. If yes, we call the + function @{const mkeps} that produces a value @{term "v\<^sub>4"} + for how @{term "r\<^sub>4"} can match the empty string (taking into + account the POSIX constraints in case there are several ways). This + function is defined by the clauses: \begin{figure}[t] \begin{center} @@ -794,15 +791,15 @@ that in each ``iteration'' of the star, some non-empty substring needs to be ``chipped'' away; only in case of the empty string we accept @{term "Stars []"} as the POSIX value. Indeed we can show that our POSIX value - is a canonical value which excludes those @{text Stars} containing values + is a lexical value which excludes those @{text Stars} containing values that flatten to the empty string. \begin{lemma} - @{thm [mode=IfThen] Posix_CV} + @{thm [mode=IfThen] Posix_LV} \end{lemma} \begin{proof} - By routine induction on @{thm (prem 1) Posix_CV}.\qed + By routine induction on @{thm (prem 1) Posix_LV}.\qed \end{proof} \noindent @@ -922,6 +919,270 @@ *} +section {* Ordering of Values according to Okui and Suzuki*} + +text {* + + While in the previous section we have defined POSIX values directly + in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}), + Sulzmann and Lu took a different approach in \cite{Sulzmann2014}: + they introduced an ordering for values and identified POSIX values + as the maximal elements. An extended version of \cite{Sulzmann2014} + is available at the website of its first author; this includes more + details of their proofs, but which are evidently not in final form + yet. Unfortunately, we were not able to verify claims that their + ordering has properties such as being transitive or having maximal + elements. + + Okui and Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} described + another ordering of values, which they use to establish the correctness of + their automata-based algorithm for POSIX matching. Their ordering + resembles some aspects of the one given by Sulzmann and Lu, but + is quite different. To begin with, Okui and Suzuki identify POSIX + values as least elements in their ordering. A more substantial + difference is that the ordering by Okui + and Suzuki uses \emph{positions} in order to identify and + compare subvalues, whereby positions are lists of natural + numbers. This allows them to quite naturally formalise the Longest + Match and Priority rules of the informal POSIX standard. Consider + for example the value @{term v} of the form @{term "Stars [Seq + (Char x) (Char y), Char z]"}, say. At position @{text "[0,1]"} of + this value is the subvalue @{text "Char y"} and at position @{text + "[1]"} the subvalue @{term "Char z"}. At the `root' position, or + empty list @{term "[]"}, is the whole value @{term v}. The + positions @{text "[0,1,0]"} and @{text "[2]"}, for example, are + outside of @{text v}. If it exists, the subvalue of @{term v} at a + position @{text p}, written @{term "at v p"}, can be recursively + defined by + + \begin{center} + \begin{tabular}{r@ {\hspace{0mm}}lcl} + @{term v} & @{text "\\<^bsub>[]\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(1)}\\ + @{term "Left v"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(2)}\\ + @{term "Right v"} & @{text "\\<^bsub>1::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"} & + @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ + @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>1::ps\<^esub>"} + & @{text "\"} & + @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ + @{term "Stars vs"} & @{text "\\<^bsub>n::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(6)}\\ + \end{tabular} + \end{center} + + \noindent We use Isabelle's notation @{term "vs ! n"} for the + @{text n}th element in a list. The set of positions inside a value @{text v}, + written @{term "Pos v"}, is given by the clauses + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) Pos.simps(1)} & @{text "\"} & @{thm (rhs) Pos.simps(1)}\\ + @{thm (lhs) Pos.simps(2)} & @{text "\"} & @{thm (rhs) Pos.simps(2)}\\ + @{thm (lhs) Pos.simps(3)} & @{text "\"} & @{thm (rhs) Pos.simps(3)}\\ + @{thm (lhs) Pos.simps(4)} & @{text "\"} & @{thm (rhs) Pos.simps(4)}\\ + @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + & @{text "\"} + & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm (lhs) Pos_stars} & @{text "\"} & @{thm (rhs) Pos_stars}\\ + \end{tabular} + \end{center} + + \noindent + In the last clause @{text len} stands for the length of a list. Clearly + for every position inside a value there exists a subvalue at that position. + + + To help understanding the ordering of Okui and Suzuki, consider again + the earlier value + @{text v} and compare it with the following @{text w}: + + \begin{center} + \begin{tabular}{l} + @{term "v == Stars [Seq (Char x) (Char y), Char z]"}\\ + @{term "w == Stars [Char x, Char y, Char z]"} + \end{tabular} + \end{center} + + \noindent Both values match the string @{text "xyz"}, that means if + we flatten the values at their respective root position, we obtain + @{text "xyz"}. However, at position @{text "[0]"}, @{text v} matches + @{text xy} whereas @{text w} matches only the shorter @{text x}. So + according to the Longest Match Rule, we should prefer @{text v}, + rather than @{text w} as POSIX value for string @{text xyz} (and + corresponding regular expression). In order to + formalise this idea, Okui and Suzuki introduce a measure for + subvalues at position @{text p}, called the \emph{norm} of @{text v} + at position @{text p}. We can define this measure in Isabelle as an + integer as follows + + \begin{center} + @{thm pflat_len_def} + \end{center} + + \noindent where we take the length of the flattened value at + position @{text p}, provided the position is inside @{text v}; if + not, then the norm is @{text "-1"}. The default is crucial + for the POSIX requirement of preferring a @{text Left}-value + over a @{text Right}-value (if they can match the same string---see + the Priority Rule from the Introduction). For this consider + + \begin{center} + @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"} + \end{center} + + \noindent Both values match @{text x}, but at position @{text "[0]"} + the norm of @{term v} is @{text 1} (the subvalue matches @{text x}), but the + norm of @{text w} is @{text "-1"} (the position is outside @{text w} + according to how we defined the `inside' positions of @{text Left}- + and @{text Right}-values). Of course at position @{text "[1]"}, the + norms @{term "pflat_len v [1]"} and @{term "pflat_len w [1]"} are + reversed, but the point is that subvalues will be analysed according to + lexicographically orderd positions. This order, written @{term + "DUMMY \lex DUMMY"}, can be conveniently formalised by + three inference rules + + \begin{center} + \begin{tabular}{ccc} + @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and + ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} & + @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]} + \end{tabular} + \end{center} + + With the norm and lexicographic order of positions in place, + we can state the key definition of Okui and Suzuki + \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller} than + @{term "v\<^sub>2"} if and only if $(i)$ the norm at position @{text p} is + greater in @{term "v\<^sub>1"} (that is the string @{term "flat (at v\<^sub>1 p)"} is longer + than @{term "flat (at v\<^sub>2 p)"}) and $(ii)$ all subvalues at + positions that are inside @{term "v\<^sub>1"} or @{term "v\<^sub>2"} and that are + lexicographically smaller than @{text p}, we have the same norm, namely + + \begin{center} + \begin{tabular}{c} + @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + @{text "\"} + $\begin{cases} + (i) & @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} \quad\text{and}\smallskip \\ + (ii) & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} + \end{cases}$ + \end{tabular} + \end{center} + + \noindent The position @{text p} in this definition acts as the + \emph{first distinct position} of @{text "v\<^sub>1"} and @{text + "v\<^sub>2"}, where both values match strings of different length + \cite{OkuiSuzuki2010}. Since at @{text p} the values @{text + "v\<^sub>1"} and @{text "v\<^sub>2"} macth different strings, the + ordering is irreflexive. Derived from the definition above + are the following two orderings: + + \begin{center} + \begin{tabular}{l} + @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ + @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \end{tabular} + \end{center} + + While we encountred a number of obstacles for establishing properties like + transitivity for the ordering of Sulzmann and Lu (and which we failed + to overcome), it is relatively straightforward to establish this + property for the ordering by Okui and Suzuki. + + \begin{lemma}[Transitivity]\label{transitivity} + @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} + \end{lemma} + + \begin{proof} From the assumption we obtain two positions @{text p} + and @{text q}, where the values @{text "v\<^sub>1"} and @{text + "v\<^sub>2"} (respectively @{text "v\<^sub>2"} and @{text + "v\<^sub>3"}) are `distinct'. Since @{text + "\\<^bsub>lex\<^esub>"} is trichotomous, we need to consider + three cases, namely @{term "p = q"}, @{term "p \lex q"} and + @{term "q \lex p"}. Let us look at the first case. + Clearly @{term "pflat_len v\<^sub>2 p < pflat_len v\<^sub>1 p"} + and @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>2 p"} + imply @{term "pflat_len v\<^sub>3 p < pflat_len v\<^sub>1 p"}. + It remains to show for a @{term "p' \ Pos v\<^sub>1 \ Pos v\<^sub>3"} + with @{term "p' \lex p"} that + @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>3 p'"} holds. + Suppose @{term "p' \ Pos v\<^sub>1"}, then we can infer from the + first assumption that @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}. + But this means that @{term "p'"} must be in @{term "Pos v\<^sub>2"} too. + Hence we can use the second assumption and infer @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, which concludes + this case with @{term "v\<^sub>1 :\val v\<^sub>2"}. + The reasoning in the other cases is similar.\qed + \end{proof} + + \noindent We can show that @{term "DUMMY :\val DUMMY"} is + a partial order. Okui and Suzuki also show that it is a linear order + for lexical values \cite{OkuiSuzuki2010}, but we have not done + this. What we are going to show below is that for a given @{text r} + and @{text s}, the ordering has a unique minimal element on the set + @{term "LV r s"} , which is the POSIX value we defined in the + previous section. + + + Lemma 1 + + @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + but in the other direction only + + @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + + + + Next we establish how Okui and Suzuki's ordering relates to our + definition of POSIX values. Given a POSIX value @{text "v\<^sub>1"} + for @{text r} and @{text s}, then any other lexical value @{text + "v\<^sub>2"} in @{term "LV r s"} is greater or equal than @{text + "v\<^sub>1"}: + + + \begin{theorem} + @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} + \end{theorem} + + \begin{proof} + By induction on our POSIX rules. The two base cases are straightforward: for example + for @{term "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \ LV ONE []"} must also + be of the form \mbox{@{term "v\<^sub>2 = Void"}}. Therfore we have @{term "v\<^sub>1 :\val v\<^sub>2"}. + The inductive cases are as follows: + + \begin{itemize} + \item[$\bullet$] Case @{term "s \ (ALT r\<^sub>1 r\<^sub>2) \ (Left w\<^sub>1)"}: + In this case @{term "v\<^sub>1 = Left w\<^sub>1"} and the value @{term "v\<^sub>2"} is either + of the form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the latter case we + can immediately conclude with @{term "v\<^sub>1 :\val v\<^sub>2"} since a @{text Left}-value + with the same underlying string @{text s} is always smaller or equal than a @{text Right}-value. + In the former case we have @{term "w\<^sub>2 \ LV r\<^sub>1 s"} and can use the induction + hypothesis to infer @{term "w\<^sub>1 :\val w\<^sub>2"}. Because @{term "w\<^sub>1"} + and @{term "w\<^sub>2"} have the same underlying string @{text s}, we can conclude with + @{term "Left w\<^sub>1 :\val Left w\<^sub>2"}. + + \item[$\bullet$] Case @{term "s \ (ALT r\<^sub>1 r\<^sub>2) \ (Right w\<^sub>1)"}: + Similarly for the case where + @{term "v\<^sub>1 = Right w\<^sub>1"}. + + \item[$\bullet$] + \end{itemize} + \end{proof} + + Given a lexical value @{text "v\<^sub>1"}, say, in @{term "LV r s"} for which there does + not exists any smaller value in @{term "LV r s"}, then this value is our POSIX value: + + \begin{theorem} + @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} + \end{theorem} + + \begin{proof} + By induction on @{text r}. + \end{proof} +*} + + section {* Extensions and Optimisations*} text {* @@ -1121,96 +1382,6 @@ \end{proof} *} -section {* Ordering of Values according to Okui and Suzuki*} - -text {* - - Positions are lists of natural numbers. - - The subvalue at position @{text p}, written @{term "at v p"}, is defined - - - \begin{center} - \begin{tabular}{r@ {\hspace{0mm}}lcl} - @{term v} & @{text "\\<^bsub>[]\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(1)}\\ - @{term "Left v"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(2)}\\ - @{term "Right v"} & @{text "\\<^bsub>1::ps\<^esub>"} & @{text "\"} & - @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ - @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"} & - @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ - @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>1::ps\<^esub>"} - & @{text "\"} & - @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ - @{term "Stars vs"} & @{text "\\<^bsub>n::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(6)}\\ - \end{tabular} - \end{center} - - \begin{center} - \begin{tabular}{lcl} - @{thm (lhs) Pos.simps(1)} & @{text "\"} & @{thm (rhs) Pos.simps(1)}\\ - @{thm (lhs) Pos.simps(2)} & @{text "\"} & @{thm (rhs) Pos.simps(2)}\\ - @{thm (lhs) Pos.simps(3)} & @{text "\"} & @{thm (rhs) Pos.simps(3)}\\ - @{thm (lhs) Pos.simps(4)} & @{text "\"} & @{thm (rhs) Pos.simps(4)}\\ - @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - & @{text "\"} - & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ - @{thm (lhs) Pos_stars} & @{text "\"} & @{thm (rhs) Pos_stars}\\ - \end{tabular} - \end{center} - - @{thm pflat_len_def} - - - \begin{center} - \begin{tabular}{ccc} - @{thm [mode=Axiom] lex_list.intros(1)}\hspace{1cm} & - @{thm [mode=Rule] lex_list.intros(2)[where ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]}\hspace{1cm} & - @{thm [mode=Rule] lex_list.intros(3)[where ?p1.0="p\<^sub>1" and ?p2.0="p\<^sub>2" and - ?ps1.0="ps\<^sub>1" and ?ps2.0="ps\<^sub>2"]} - \end{tabular} - \end{center} - - - Main definition by Okui and Suzuki. - - \begin{center} - \begin{tabular}{ccl} - @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} & - @{text "\"} & - @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} and\\ - & & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} - \end{tabular} - \end{center} - - @{thm PosOrd_ex_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - - @{thm PosOrd_ex_eq_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - - Lemma 1 - - @{thm [mode=IfThen] PosOrd_shorterE[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - - but in the other direction only - - @{thm [mode=IfThen] PosOrd_shorterI[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - - Lemma Transitivity: - - @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} - - -*} - -text {* - - Theorem 1: - - @{thm [mode=IfThen] Posix_PosOrd[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - - Theorem 2: - - @{thm [mode=IfThen] PosOrd_Posix[where ?v1.0="v\<^sub>1"]} -*} section {* The Correctness Argument by Sulzmann and Lu\label{argu} *} diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Journal/document/root.bib --- a/thys/Journal/document/root.bib Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Journal/document/root.bib Fri Aug 18 14:51:29 2017 +0100 @@ -308,7 +308,7 @@ -@InProceedings{OkuiSuzuki2013, +@InProceedings{OkuiSuzuki2010, author = {S.~Okui and T.~Suzuki}, title = {{D}isambiguation in {R}egular {E}xpression {M}atching via {P}osition {A}utomata with {A}ugmented {T}ransitions}, @@ -320,3 +320,13 @@ pages = {231--240} } + + +@TechReport{OkuiSuzukiTech, + author = {S.~Okui and T.~Suzuki}, + title = {{D}isambiguation in {R}egular {E}xpression {M}atching via + {P}osition {A}utomata with {A}ugmented {T}ransitions}, + institution = {University of Aizu}, + year = {2013} +} + diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Journal/document/root.tex --- a/thys/Journal/document/root.tex Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Journal/document/root.tex Fri Aug 18 14:51:29 2017 +0100 @@ -41,11 +41,16 @@ \renewcommand{\thefootnote}{$\star$} \footnotetext[1]{This paper is a revised and expanded version of \cite{AusafDyckhoffUrban2016}. Compared with that paper we give a second definition for POSIX - values introduced by Okui Suzuki \cite{OkuiSuzuki2013} and prove that it is + values introduced by Okui Suzuki \cite{OkuiSuzuki2010,OkuiSuzukiTech} + and prove that it is equivalent to our original one. This second definition is based on an ordering of values and very similar to, but not equivalent with, the - definition given by Sulzmann and Lu~\cite{Sulzmann2014}. We also + definition given by Sulzmann and Lu~\cite{Sulzmann2014}. + The advantage of the definition based on the + ordering is that it implements more directly the informal rules from the + POSIX standard. + We also extend our results to additional constructors of regular expressions.} \renewcommand{\thefootnote}{\arabic{footnote}} @@ -75,9 +80,7 @@ second part we show that $(iii)$ our inductive definition of a POSIX value is equivalent to an alternative definition by Okui and Suzuki which identifies POSIX values as least elements according to an -ordering of values. The advantage of the definition based on the -ordering is that it implements more directly the informal rules from the -POSIX standard.\smallskip +ordering of values. \smallskip {\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Lexer.thy --- a/thys/Lexer.thy Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Lexer.thy Fri Aug 18 14:51:29 2017 +0100 @@ -38,7 +38,7 @@ lemma mkeps_nullable: assumes "nullable(r)" - shows "\ mkeps r : r" + shows "\ mkeps r : r" using assms by (induct rule: nullable.induct) (auto intro: Prf.intros) @@ -49,22 +49,25 @@ using assms by (induct rule: nullable.induct) (auto) -lemma Prf_injval: - assumes "\ v : der c r" - shows "\ (injval r c v) : r" -using assms -apply(induct r arbitrary: c v rule: rexp.induct) -apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) -done - lemma Prf_injval_flat: - assumes "\ v : der c r" + assumes "\ v : der c r" shows "flat (injval r c v) = c # (flat v)" using assms apply(induct arbitrary: v rule: der.induct) apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits) done +lemma Prf_injval: + assumes "\ v : der c r" + shows "\ (injval r c v) : r" +using assms +apply(induct r arbitrary: c v rule: rexp.induct) +apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) +apply(simp add: Prf_injval_flat) +done + + + text {* Mkeps and injval produce, or preserve, Posix values. *} diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Positions.thy --- a/thys/Positions.thy Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Positions.thy Fri Aug 18 14:51:29 2017 +0100 @@ -1,6 +1,6 @@ theory Positions - imports "Spec" + imports "Spec" "Lexer" begin section {* Positions in Values *} @@ -38,30 +38,9 @@ shows "[] \ Pos v" by (induct v rule: Pos.induct)(auto) -fun intlen :: "'a list \ int" -where - "intlen [] = 0" -| "intlen (x # xs) = 1 + intlen xs" - -lemma intlen_int: - shows "intlen xs = int (length xs)" -by (induct xs)(simp_all) +abbreviation + "intlen vs \ int (length vs)" -lemma intlen_bigger: - shows "0 \ intlen xs" -by (induct xs)(auto) - -lemma intlen_append: - shows "intlen (xs @ ys) = intlen xs + intlen ys" -by (simp add: intlen_int) - -lemma intlen_length: - shows "intlen xs < intlen ys \ length xs < length ys" -by (simp add: intlen_int) - -lemma intlen_length_eq: - shows "intlen xs = intlen ys \ length xs = length ys" -by (simp add: intlen_int) definition pflat_len :: "val \ nat list => int" where @@ -165,27 +144,118 @@ "v1 :\val v2 \ v1 :\val v2 \ v1 = v2" +lemma PosOrd_trans: + assumes "v1 :\val v2" "v2 :\val v3" + shows "v1 :\val v3" +proof - + from assms obtain p p' + where as: "v1 \val p v2" "v2 \val p' v3" unfolding PosOrd_ex_def by blast + then have pos: "p \ Pos v1" "p' \ Pos v2" unfolding PosOrd_def pflat_len_def + by (smt not_int_zless_negative)+ + have "p = p' \ p \lex p' \ p' \lex p" + by (rule lex_trichotomous) + moreover + { assume "p = p'" + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by (smt Un_iff) + then have " v1 :\val v3" unfolding PosOrd_ex_def by blast + } + moreover + { assume "p \lex p'" + with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def + by (smt Un_iff lex_trans) + then have " v1 :\val v3" unfolding PosOrd_ex_def by blast + } + moreover + { assume "p' \lex p" + with as have "v1 \val p' v3" unfolding PosOrd_def + by (smt Un_iff lex_trans pflat_len_def) + then have "v1 :\val v3" unfolding PosOrd_ex_def by blast + } + ultimately show "v1 :\val v3" by blast +qed + +lemma PosOrd_irrefl: + assumes "v :\val v" + shows "False" +using assms unfolding PosOrd_ex_def PosOrd_def +by auto + +lemma PosOrd_assym: + assumes "v1 :\val v2" + shows "\(v2 :\val v1)" +using assms +using PosOrd_irrefl PosOrd_trans by blast + +text {* + :\val and :\val are partial orders. +*} + +lemma PosOrd_ordering: + shows "ordering (\v1 v2. v1 :\val v2) (\ v1 v2. v1 :\val v2)" +unfolding ordering_def PosOrd_ex_eq_def +apply(auto) +using PosOrd_irrefl apply blast +using PosOrd_assym apply blast +using PosOrd_trans by blast + +lemma PosOrd_order: + shows "class.order (\v1 v2. v1 :\val v2) (\ v1 v2. v1 :\val v2)" +using PosOrd_ordering +apply(simp add: class.order_def class.preorder_def class.order_axioms_def) +unfolding ordering_def +by blast + + +lemma PosOrd_ex_eq2: + shows "v1 :\val v2 \ (v1 :\val v2 \ v1 \ v2)" +using PosOrd_ordering +unfolding ordering_def +by auto + +lemma PosOrdeq_trans: + assumes "v1 :\val v2" "v2 :\val v3" + shows "v1 :\val v3" +using assms PosOrd_ordering +unfolding ordering_def +by blast + +lemma PosOrdeq_antisym: + assumes "v1 :\val v2" "v2 :\val v1" + shows "v1 = v2" +using assms PosOrd_ordering +unfolding ordering_def +by blast + +lemma PosOrdeq_refl: + shows "v :\val v" +unfolding PosOrd_ex_eq_def +by auto + + + + lemma PosOrd_shorterE: assumes "v1 :\val v2" shows "length (flat v2) \ length (flat v1)" using assms unfolding PosOrd_ex_def PosOrd_def -apply(auto simp add: pflat_len_def intlen_int split: if_splits) +apply(auto simp add: pflat_len_def split: if_splits) apply (metis Pos_empty Un_iff at.simps(1) eq_iff lex_simps(1) nat_less_le) by (metis Pos_empty UnI2 at.simps(1) lex_simps(2) lex_trichotomous linear) lemma PosOrd_shorterI: assumes "length (flat v2) < length (flat v1)" - shows "v1 :\val v2" -using assms -unfolding PosOrd_ex_def -by (metis intlen_length lex_simps(2) pflat_len_simps(9) PosOrd_def) + shows "v1 :\val v2" +unfolding PosOrd_ex_def PosOrd_def pflat_len_def +using assms Pos_empty by force lemma PosOrd_spreI: assumes "flat v' \spre flat v" shows "v :\val v'" using assms apply(rule_tac PosOrd_shorterI) -by (metis append_eq_conv_conj le_less_linear prefix_list_def sprefix_list_def take_all) +unfolding prefix_list_def sprefix_list_def +by (metis append_Nil2 append_eq_conv_conj drop_all le_less_linear) lemma PosOrd_Left_Right: @@ -194,7 +264,7 @@ unfolding PosOrd_ex_def apply(rule_tac x="[0]" in exI) using assms -apply(auto simp add: PosOrd_def pflat_len_simps intlen_int) +apply(auto simp add: PosOrd_def pflat_len_simps) done lemma PosOrd_Left_eq: @@ -219,7 +289,7 @@ lemma PosOrd_RightI: assumes "v :\val v'" "flat v = flat v'" - shows "(Right v) :\val (Right v')" + shows "Right v :\val Right v'" using assms(1) unfolding PosOrd_ex_def apply(auto) @@ -229,7 +299,7 @@ done lemma PosOrd_RightE: - assumes "(Right v1) :\val (Right v2)" + assumes "Right v1 :\val Right v2" shows "v1 :\val v2" using assms apply(simp add: PosOrd_ex_def) @@ -258,7 +328,7 @@ lemma PosOrd_SeqI1: assumes "v1 :\val v1'" "flat (Seq v1 v2) = flat (Seq v1' v2')" - shows "(Seq v1 v2) :\val (Seq v1' v2')" + shows "Seq v1 v2 :\val Seq v1' v2'" using assms(1) apply(subst (asm) PosOrd_ex_def) apply(subst (asm) PosOrd_def) @@ -275,12 +345,12 @@ apply(simp add: pflat_len_simps) using assms(2) apply(simp) -apply(auto simp add: pflat_len_simps)[2] -done +apply(auto simp add: pflat_len_simps) +by (metis length_append of_nat_add) lemma PosOrd_SeqI2: assumes "v2 :\val v2'" "flat v2 = flat v2'" - shows "(Seq v v2) :\val (Seq v v2')" + shows "Seq v v2 :\val Seq v v2'" using assms(1) apply(subst (asm) PosOrd_ex_def) apply(subst (asm) PosOrd_def) @@ -301,21 +371,21 @@ done lemma PosOrd_SeqE: - assumes "(Seq v1 v2) :\val (Seq v1' v2')" + assumes "Seq v1 v2 :\val Seq v1' v2'" shows "v1 :\val v1' \ v2 :\val v2'" using assms apply(simp add: PosOrd_ex_def) apply(erule exE) apply(case_tac p) apply(simp add: PosOrd_def) -apply(auto simp add: pflat_len_simps intlen_append)[1] +apply(auto simp add: pflat_len_simps)[1] apply(rule_tac x="[]" in exI) apply(drule_tac x="[]" in spec) apply(simp add: Pos_empty pflat_len_simps) apply(case_tac a) apply(rule disjI1) apply(simp add: PosOrd_def) -apply(auto simp add: pflat_len_simps intlen_append)[1] +apply(auto simp add: pflat_len_simps)[1] apply(rule_tac x="list" in exI) apply(simp) apply(rule ballI) @@ -326,7 +396,7 @@ apply(case_tac nat) apply(rule disjI2) apply(simp add: PosOrd_def) -apply(auto simp add: pflat_len_simps intlen_append) +apply(auto simp add: pflat_len_simps) apply(rule_tac x="list" in exI) apply(simp add: Pos_empty) apply(rule ballI) @@ -342,8 +412,8 @@ done lemma PosOrd_StarsI: - assumes "v1 :\val v2" "flat (Stars (v1#vs1)) = flat (Stars (v2#vs2))" - shows "(Stars (v1#vs1)) :\val (Stars (v2#vs2))" + assumes "v1 :\val v2" "flats (v1#vs1) = flats (v2#vs2)" + shows "Stars (v1#vs1) :\val Stars (v2#vs2)" using assms(1) apply(subst (asm) PosOrd_ex_def) apply(subst (asm) PosOrd_def) @@ -353,13 +423,13 @@ apply(rule_tac x="0#p" in exI) apply(simp add: pflat_len_Stars_simps pflat_len_simps) using assms(2) -apply(simp add: pflat_len_simps intlen_append) +apply(simp add: pflat_len_simps) apply(auto simp add: pflat_len_Stars_simps pflat_len_simps) -done +by (metis length_append of_nat_add) lemma PosOrd_StarsI2: - assumes "(Stars vs1) :\val (Stars vs2)" "flat (Stars vs1) = flat (Stars vs2)" - shows "(Stars (v#vs1)) :\val (Stars (v#vs2))" + assumes "Stars vs1 :\val Stars vs2" "flats vs1 = flats vs2" + shows "Stars (v#vs1) :\val Stars (v#vs2)" using assms(1) apply(subst (asm) PosOrd_ex_def) apply(subst (asm) PosOrd_def) @@ -368,13 +438,8 @@ apply(subst PosOrd_def) apply(case_tac p) apply(simp add: pflat_len_simps) -apply(rule_tac x="[]" in exI) -apply(simp add: pflat_len_Stars_simps pflat_len_simps intlen_append) apply(rule_tac x="Suc a#list" in exI) -apply(simp add: pflat_len_Stars_simps pflat_len_simps) -using assms(2) -apply(simp add: pflat_len_simps intlen_append) -apply(auto simp add: pflat_len_Stars_simps pflat_len_simps) +apply(auto simp add: pflat_len_Stars_simps pflat_len_simps assms(2)) done lemma PosOrd_Stars_appendI: @@ -394,7 +459,7 @@ apply(erule exE) apply(case_tac p) apply(simp) -apply(simp add: PosOrd_def pflat_len_simps intlen_append) +apply(simp add: PosOrd_def pflat_len_simps) apply(subst PosOrd_ex_def) apply(rule_tac x="[]" in exI) apply(simp add: PosOrd_def pflat_len_simps Pos_empty) @@ -405,19 +470,19 @@ apply(clarify) apply(simp add: PosOrd_ex_def) apply(rule_tac x="nat#list" in exI) -apply(auto simp add: PosOrd_def pflat_len_simps intlen_append)[1] +apply(auto simp add: PosOrd_def pflat_len_simps)[1] apply(case_tac q) -apply(simp add: PosOrd_def pflat_len_simps intlen_append) +apply(simp add: PosOrd_def pflat_len_simps) apply(clarify) apply(drule_tac x="Suc a # lista" in bspec) apply(simp) -apply(auto simp add: PosOrd_def pflat_len_simps intlen_append)[1] +apply(auto simp add: PosOrd_def pflat_len_simps)[1] apply(case_tac q) -apply(simp add: PosOrd_def pflat_len_simps intlen_append) +apply(simp add: PosOrd_def pflat_len_simps) apply(clarify) apply(drule_tac x="Suc a # lista" in bspec) apply(simp) -apply(auto simp add: PosOrd_def pflat_len_simps intlen_append)[1] +apply(auto simp add: PosOrd_def pflat_len_simps)[1] done lemma PosOrd_Stars_appendE: @@ -439,42 +504,6 @@ apply(auto) done -lemma PosOrd_trans: - assumes "v1 :\val v2" "v2 :\val v3" - shows "v1 :\val v3" -proof - - from assms obtain p p' - where as: "v1 \val p v2" "v2 \val p' v3" unfolding PosOrd_ex_def by blast - have "p = p' \ p \lex p' \ p' \lex p" - by (rule lex_trichotomous) - moreover - { assume "p = p'" - with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def - by fastforce - then have " v1 :\val v3" unfolding PosOrd_ex_def by blast - } - moreover - { assume "p \lex p'" - with as have "v1 \val p v3" unfolding PosOrd_def pflat_len_def - by (smt Un_iff lex_trans) - then have " v1 :\val v3" unfolding PosOrd_ex_def by blast - } - moreover - { assume "p' \lex p" - with as have "v1 \val p' v3" unfolding PosOrd_def - by (smt Un_iff intlen_bigger lex_trans pflat_len_def) - then have "v1 :\val v3" unfolding PosOrd_ex_def by blast - } - ultimately show "v1 :\val v3" by blast -qed - - -lemma PosOrd_irrefl: - assumes "v :\val v" - shows "False" -using assms unfolding PosOrd_ex_def PosOrd_def -by auto - lemma PosOrd_almost_trichotomous: shows "v1 :\val v2 \ v2 :\val v1 \ (intlen (flat v1) = intlen (flat v2))" apply(auto simp add: PosOrd_ex_def) @@ -485,12 +514,6 @@ apply(auto simp add: Pos_empty pflat_len_simps) done -lemma WW1: - assumes "v1 :\val v2" "v2 :\val v1" - shows "False" -using assms -apply(auto simp add: PosOrd_ex_def PosOrd_def) -using assms PosOrd_irrefl PosOrd_trans by blast lemma PosOrd_SeqE2: assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" @@ -512,8 +535,8 @@ apply(auto simp add: PosOrd_def pflat_len_simps) apply(case_tac a) apply(auto simp add: PosOrd_def pflat_len_simps) -apply (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) -by (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def WW1 assms(1) assms(2)) +apply (metis PosOrd_SeqI1 PosOrd_def PosOrd_ex_def PosOrd_shorterI PosOrd_assym assms less_linear) +by (metis PosOrd_SeqI1 PosOrd_almost_trichotomous PosOrd_def PosOrd_ex_def PosOrd_assym assms of_nat_eq_iff) lemma PosOrd_SeqE4: assumes "(Seq v1 v2) :\val (Seq v1' v2')" "flat (Seq v1 v2) = flat (Seq v1' v2')" @@ -531,7 +554,7 @@ apply(auto) apply(case_tac "length (flat v1') < length (flat v1)") using PosOrd_shorterI apply blast -by (metis PosOrd_SeqI1 PosOrd_shorterI WW1 antisym_conv3 append_eq_append_conv assms(2)) +by (metis PosOrd_SeqI1 PosOrd_shorterI PosOrd_assym antisym_conv3 append_eq_append_conv assms(2)) @@ -539,39 +562,39 @@ lemma Posix_PosOrd: - assumes "s \ r \ v1" "v2 \ CV r s" + assumes "s \ r \ v1" "v2 \ LV r s" shows "v1 :\val v2" using assms proof (induct arbitrary: v2 rule: Posix.induct) case (Posix_ONE v) - have "v \ CV ONE []" by fact + have "v \ LV ONE []" by fact then have "v = Void" - by (simp add: CV_simps) + by (simp add: LV_simps) then show "Void :\val v" by (simp add: PosOrd_ex_eq_def) next case (Posix_CHAR c v) - have "v \ CV (CHAR c) [c]" by fact + have "v \ LV (CHAR c) [c]" by fact then have "v = Char c" - by (simp add: CV_simps) + by (simp add: LV_simps) then show "Char c :\val v" by (simp add: PosOrd_ex_eq_def) next case (Posix_ALT1 s r1 v r2 v2) have as1: "s \ r1 \ v" by fact - have IH: "\v2. v2 \ CV r1 s \ v :\val v2" by fact - have "v2 \ CV (ALT r1 r2) s" by fact + have IH: "\v2. v2 \ LV r1 s \ v :\val v2" by fact + have "v2 \ LV (ALT r1 r2) s" by fact then have "\ v2 : ALT r1 r2" "flat v2 = s" - by(auto simp add: CV_def prefix_list_def) + by(auto simp add: LV_def prefix_list_def) then consider (Left) v3 where "v2 = Left v3" "\ v3 : r1" "flat v3 = s" | (Right) v3 where "v2 = Right v3" "\ v3 : r2" "flat v3 = s" - by (auto elim: CPrf.cases) + by (auto elim: Prf.cases) then show "Left v :\val v2" proof(cases) case (Left v3) - have "v3 \ CV r1 s" using Left(2,3) - by (auto simp add: CV_def prefix_list_def) + have "v3 \ LV r1 s" using Left(2,3) + by (auto simp add: LV_def prefix_list_def) with IH have "v :\val v3" by simp moreover have "flat v3 = flat v" using as1 Left(3) @@ -583,27 +606,28 @@ case (Right v3) have "flat v3 = flat v" using as1 Right(3) by (simp add: Posix1(2)) - then have "Left v :\val Right v3" using Right(3) as1 - by (auto simp add: PosOrd_ex_eq_def PosOrd_Left_Right) + then have "Left v :\val Right v3" + unfolding PosOrd_ex_eq_def + by (simp add: PosOrd_Left_Right) then show "Left v :\val v2" unfolding Right . qed next case (Posix_ALT2 s r2 v r1 v2) have as1: "s \ r2 \ v" by fact have as2: "s \ L r1" by fact - have IH: "\v2. v2 \ CV r2 s \ v :\val v2" by fact - have "v2 \ CV (ALT r1 r2) s" by fact + have IH: "\v2. v2 \ LV r2 s \ v :\val v2" by fact + have "v2 \ LV (ALT r1 r2) s" by fact then have "\ v2 : ALT r1 r2" "flat v2 = s" - by(auto simp add: CV_def prefix_list_def) + by(auto simp add: LV_def prefix_list_def) then consider (Left) v3 where "v2 = Left v3" "\ v3 : r1" "flat v3 = s" | (Right) v3 where "v2 = Right v3" "\ v3 : r2" "flat v3 = s" - by (auto elim: CPrf.cases) + by (auto elim: Prf.cases) then show "Right v :\val v2" proof (cases) case (Right v3) - have "v3 \ CV r2 s" using Right(2,3) - by (auto simp add: CV_def prefix_list_def) + have "v3 \ LV r2 s" using Right(2,3) + by (auto simp add: LV_def prefix_list_def) with IH have "v :\val v3" by simp moreover have "flat v3 = flat v" using as1 Right(3) @@ -613,34 +637,34 @@ then show "Right v :\val v2" unfolding Right . next case (Left v3) - have "v3 \ CV r1 s" using Left(2,3) as2 - by (auto simp add: CV_def prefix_list_def) + have "v3 \ LV r1 s" using Left(2,3) as2 + by (auto simp add: LV_def prefix_list_def) then have "flat v3 = flat v \ \ v3 : r1" using as1 Left(3) - by (simp add: Posix1(2) CV_def) + by (simp add: Posix1(2) LV_def) then have "False" using as1 as2 Left - by (auto simp add: Posix1(2) L_flat_Prf1 Prf_CPrf) + by (auto simp add: Posix1(2) L_flat_Prf1) then show "Right v :\val v2" by simp qed next case (Posix_SEQ s1 r1 v1 s2 r2 v2 v3) have "s1 \ r1 \ v1" "s2 \ r2 \ v2" by fact+ then have as1: "s1 = flat v1" "s2 = flat v2" by (simp_all add: Posix1(2)) - have IH1: "\v3. v3 \ CV r1 s1 \ v1 :\val v3" by fact - have IH2: "\v3. v3 \ CV r2 s2 \ v2 :\val v3" by fact + have IH1: "\v3. v3 \ LV r1 s1 \ v1 :\val v3" by fact + have IH2: "\v3. v3 \ LV r2 s2 \ v2 :\val v3" by fact have cond: "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r1 \ s\<^sub>4 \ L r2)" by fact - have "v3 \ CV (SEQ r1 r2) (s1 @ s2)" by fact + have "v3 \ LV (SEQ r1 r2) (s1 @ s2)" by fact then obtain v3a v3b where eqs: "v3 = Seq v3a v3b" "\ v3a : r1" "\ v3b : r2" "flat v3a @ flat v3b = s1 @ s2" - by (force simp add: prefix_list_def CV_def elim: CPrf.cases) + by (force simp add: prefix_list_def LV_def elim: Prf.cases) with cond have "flat v3a \pre s1" unfolding prefix_list_def - by (smt L_flat_Prf1 Prf_CPrf append_eq_append_conv2 append_self_conv) + by (smt L_flat_Prf1 append_eq_append_conv2 append_self_conv) then have "flat v3a \spre s1 \ (flat v3a = s1 \ flat v3b = s2)" using eqs by (simp add: sprefix_list_def append_eq_conv_conj) then have q2: "v1 :\val v3a \ (flat v3a = s1 \ flat v3b = s2)" using PosOrd_spreI as1(1) eqs by blast - then have "v1 :\val v3a \ (v3a \ CV r1 s1 \ v3b \ CV r2 s2)" using eqs(2,3) - by (auto simp add: CV_def) + then have "v1 :\val v3a \ (v3a \ LV r1 s1 \ v3b \ LV r2 s2)" using eqs(2,3) + by (auto simp add: LV_def) then have "v1 :\val v3a \ (v1 :\val v3a \ v2 :\val v3b)" using IH1 IH2 by blast then have "Seq v1 v2 :\val Seq v3a v3b" using eqs q2 as1 unfolding PosOrd_ex_eq_def by (auto simp add: PosOrd_SeqI1 PosOrd_SeqI2) @@ -649,43 +673,43 @@ case (Posix_STAR1 s1 r v s2 vs v3) have "s1 \ r \ v" "s2 \ STAR r \ Stars vs" by fact+ then have as1: "s1 = flat v" "s2 = flat (Stars vs)" by (auto dest: Posix1(2)) - have IH1: "\v3. v3 \ CV r s1 \ v :\val v3" by fact - have IH2: "\v3. v3 \ CV (STAR r) s2 \ Stars vs :\val v3" by fact + have IH1: "\v3. v3 \ LV r s1 \ v :\val v3" by fact + have IH2: "\v3. v3 \ LV (STAR r) s2 \ Stars vs :\val v3" by fact have cond: "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (STAR r))" by fact have cond2: "flat v \ []" by fact - have "v3 \ CV (STAR r) (s1 @ s2)" by fact + have "v3 \ LV (STAR r) (s1 @ s2)" by fact then consider (NonEmpty) v3a vs3 where "v3 = Stars (v3a # vs3)" "\ v3a : r" "\ Stars vs3 : STAR r" "flat (Stars (v3a # vs3)) = s1 @ s2" | (Empty) "v3 = Stars []" - unfolding CV_def + unfolding LV_def apply(auto) - apply(erule CPrf.cases) + apply(erule Prf.cases) apply(simp_all) apply(auto)[1] apply(case_tac vs) apply(auto) - using CPrf.intros(6) by blast - then show "Stars (v # vs) :\val v3" (* HERE *) + using Prf.intros(6) by blast + then show "Stars (v # vs) :\val v3" proof (cases) case (NonEmpty v3a vs3) have "flat (Stars (v3a # vs3)) = s1 @ s2" using NonEmpty(4) . with cond have "flat v3a \pre s1" using NonEmpty(2,3) unfolding prefix_list_def - by (smt L_flat_Prf1 Prf_CPrf append_Nil2 append_eq_append_conv2 flat.simps(7)) + by (smt L_flat_Prf1 append_Nil2 append_eq_append_conv2 flat.simps(7)) then have "flat v3a \spre s1 \ (flat v3a = s1 \ flat (Stars vs3) = s2)" using NonEmpty(4) by (simp add: sprefix_list_def append_eq_conv_conj) then have q2: "v :\val v3a \ (flat v3a = s1 \ flat (Stars vs3) = s2)" using PosOrd_spreI as1(1) NonEmpty(4) by blast - then have "v :\val v3a \ (v3a \ CV r s1 \ Stars vs3 \ CV (STAR r) s2)" - using NonEmpty(2,3) by (auto simp add: CV_def) + then have "v :\val v3a \ (v3a \ LV r s1 \ Stars vs3 \ LV (STAR r) s2)" + using NonEmpty(2,3) by (auto simp add: LV_def) then have "v :\val v3a \ (v :\val v3a \ Stars vs :\val Stars vs3)" using IH1 IH2 by blast then have "v :\val v3a \ (v = v3a \ Stars vs :\val Stars vs3)" unfolding PosOrd_ex_eq_def by auto then have "Stars (v # vs) :\val Stars (v3a # vs3)" using NonEmpty(4) q2 as1 unfolding PosOrd_ex_eq_def - by (metis PosOrd_StarsI PosOrd_StarsI2 flat.simps(7) val.inject(5)) + using PosOrd_StarsI PosOrd_StarsI2 by auto then show "Stars (v # vs) :\val v3" unfolding NonEmpty by blast next case Empty @@ -696,9 +720,9 @@ qed next case (Posix_STAR2 r v2) - have "v2 \ CV (STAR r) []" by fact + have "v2 \ LV (STAR r) []" by fact then have "v2 = Stars []" - unfolding CV_def by (auto elim: CPrf.cases) + unfolding LV_def by (auto elim: Prf.cases) then show "Stars [] :\val v2" by (simp add: PosOrd_ex_eq_def) qed @@ -706,7 +730,7 @@ lemma Posix_PosOrd_reverse: assumes "s \ r \ v1" - shows "\(\v2 \ CV r s. v2 :\val v1)" + shows "\(\v2 \ LV r s. v2 :\val v1)" using assms by (metis Posix_PosOrd less_irrefl PosOrd_def PosOrd_ex_eq_def PosOrd_ex_def PosOrd_trans) @@ -729,7 +753,7 @@ \ flat (Stars vs) \ STAR r \ Stars vs" by fact have as2: "\v\set (v # vs). flat v \ r \ v \ flat v \ []" by fact have as3: "\ (\vs2\LV (STAR r) (flat (Stars (v # vs))). vs2 :\val Stars (v # vs))" by fact - have "flat v \ r \ v" using as2 by simp + have "flat v \ r \ v" "flat v \ []" using as2 by auto moreover have "flat (Stars vs) \ STAR r \ Stars vs" proof (rule IH) @@ -742,11 +766,14 @@ apply(erule Prf.cases) apply(simp_all) apply(drule_tac x="Stars (v # vs)" in bspec) - apply(simp add: LV_def CV_def) - using Posix_Prf Prf.intros(6) calculation + apply(simp add: LV_def) + using Posix_LV Prf.intros(6) calculation apply(rule_tac Prf.intros) apply(simp add:) + prefer 2 apply (simp add: PosOrd_StarsI2) + apply(drule Posix_LV) + apply(simp add: LV_def) done qed moreover @@ -778,48 +805,44 @@ section {* The Smallest Value is indeed the Posix Value *} -text {* - The next lemma seems to require LV instead of CV in the Star-case. -*} - lemma PosOrd_Posix: - assumes "v1 \ CV r s" "\v\<^sub>2 \ LV r s. \ v\<^sub>2 :\val v1" + assumes "v1 \ LV r s" "\v\<^sub>2 \ LV r s. \ v\<^sub>2 :\val v1" shows "s \ r \ v1" using assms proof(induct r arbitrary: s v1) case (ZERO s v1) - have "v1 \ CV ZERO s" by fact - then show "s \ ZERO \ v1" unfolding CV_def - by (auto elim: CPrf.cases) + have "v1 \ LV ZERO s" by fact + then show "s \ ZERO \ v1" unfolding LV_def + by (auto elim: Prf.cases) next case (ONE s v1) - have "v1 \ CV ONE s" by fact - then show "s \ ONE \ v1" unfolding CV_def - by(auto elim!: CPrf.cases intro: Posix.intros) + have "v1 \ LV ONE s" by fact + then show "s \ ONE \ v1" unfolding LV_def + by(auto elim!: Prf.cases intro: Posix.intros) next case (CHAR c s v1) - have "v1 \ CV (CHAR c) s" by fact - then show "s \ CHAR c \ v1" unfolding CV_def - by (auto elim!: CPrf.cases intro: Posix.intros) + have "v1 \ LV (CHAR c) s" by fact + then show "s \ CHAR c \ v1" unfolding LV_def + by (auto elim!: Prf.cases intro: Posix.intros) next case (ALT r1 r2 s v1) - have IH1: "\s v1. \v1 \ CV r1 s; \v2 \ LV r1 s. \ v2 :\val v1\ \ s \ r1 \ v1" by fact - have IH2: "\s v1. \v1 \ CV r2 s; \v2 \ LV r2 s. \ v2 :\val v1\ \ s \ r2 \ v1" by fact + have IH1: "\s v1. \v1 \ LV r1 s; \v2 \ LV r1 s. \ v2 :\val v1\ \ s \ r1 \ v1" by fact + have IH2: "\s v1. \v1 \ LV r2 s; \v2 \ LV r2 s. \ v2 :\val v1\ \ s \ r2 \ v1" by fact have as1: "\v2\LV (ALT r1 r2) s. \ v2 :\val v1" by fact - have as2: "v1 \ CV (ALT r1 r2) s" by fact + have as2: "v1 \ LV (ALT r1 r2) s" by fact then consider (Left) v1' where "v1 = Left v1'" "s = flat v1'" - "v1' \ CV r1 s" + "v1' \ LV r1 s" | (Right) v1' where "v1 = Right v1'" "s = flat v1'" - "v1' \ CV r2 s" - unfolding CV_def by (auto elim: CPrf.cases) + "v1' \ LV r2 s" + unfolding LV_def by (auto elim: Prf.cases) then show "s \ ALT r1 r2 \ v1" proof (cases) case (Left v1') - have "v1' \ CV r1 s" using as2 - unfolding CV_def Left by (auto elim: CPrf.cases) + have "v1' \ LV r1 s" using as2 + unfolding LV_def Left by (auto elim: Prf.cases) moreover have "\v2 \ LV r1 s. \ v2 :\val v1'" using as1 unfolding LV_def Left using Prf.intros(2) PosOrd_Left_eq by force @@ -828,8 +851,8 @@ then show "s \ ALT r1 r2 \ v1" using Left by simp next case (Right v1') - have "v1' \ CV r2 s" using as2 - unfolding CV_def Right by (auto elim: CPrf.cases) + have "v1' \ LV r2 s" using as2 + unfolding LV_def Right by (auto elim: Prf.cases) moreover have "\v2 \ LV r2 s. \ v2 :\val v1'" using as1 unfolding LV_def Right using Prf.intros(3) PosOrd_RightI by force @@ -841,7 +864,8 @@ then have "Left v' \ LV (ALT r1 r2) s" unfolding LV_def by (auto intro: Prf.intros) with as1 have "\ (Left v' :\val Right v1') \ (flat v' = s)" - unfolding LV_def Right by (auto) + unfolding LV_def Right + by (auto) then have False using PosOrd_Left_Right Right by blast } then have "s \ L r1" by rule @@ -850,21 +874,21 @@ qed next case (SEQ r1 r2 s v1) - have IH1: "\s v1. \v1 \ CV r1 s; \v2 \ LV r1 s. \ v2 :\val v1\ \ s \ r1 \ v1" by fact - have IH2: "\s v1. \v1 \ CV r2 s; \v2 \ LV r2 s. \ v2 :\val v1\ \ s \ r2 \ v1" by fact + have IH1: "\s v1. \v1 \ LV r1 s; \v2 \ LV r1 s. \ v2 :\val v1\ \ s \ r1 \ v1" by fact + have IH2: "\s v1. \v1 \ LV r2 s; \v2 \ LV r2 s. \ v2 :\val v1\ \ s \ r2 \ v1" by fact have as1: "\v2\LV (SEQ r1 r2) s. \ v2 :\val v1" by fact - have as2: "v1 \ CV (SEQ r1 r2) s" by fact + have as2: "v1 \ LV (SEQ r1 r2) s" by fact then obtain v1a v1b where eqs: "v1 = Seq v1a v1b" "s = flat v1a @ flat v1b" - "v1a \ CV r1 (flat v1a)" "v1b \ CV r2 (flat v1b)" - unfolding CV_def by(auto elim: CPrf.cases) + "v1a \ LV r1 (flat v1a)" "v1b \ LV r2 (flat v1b)" + unfolding LV_def by(auto elim: Prf.cases) have "\v2 \ LV r1 (flat v1a). \ v2 :\val v1a" proof fix v2 assume "v2 \ LV r1 (flat v1a)" with eqs(2,4) have "Seq v2 v1b \ LV (SEQ r1 r2) s" - by (simp add: CV_def LV_def Prf.intros(1) Prf_CPrf) + by (simp add: LV_def Prf.intros(1)) with as1 have "\ Seq v2 v1b :\val Seq v1a v1b \ flat (Seq v2 v1b) = flat (Seq v1a v1b)" using eqs by (simp add: LV_def) then show "\ v2 :\val v1a" @@ -877,7 +901,7 @@ fix v2 assume "v2 \ LV r2 (flat v1b)" with eqs(2,3,4) have "Seq v1a v2 \ LV (SEQ r1 r2) s" - by (simp add: CV_def LV_def Prf.intros Prf_CPrf) + by (simp add: LV_def Prf.intros) with as1 have "\ Seq v1a v2 :\val Seq v1a v1b \ flat v2 = flat v1b" using eqs by (simp add: LV_def) then show "\ v2 :\val v1b" @@ -889,10 +913,10 @@ proof assume "\s3 s4. s3 \ [] \ s3 @ s4 = flat v1b \ flat v1a @ s3 \ L r1 \ s4 \ L r2" then obtain s3 s4 where q1: "s3 \ [] \ s3 @ s4 = flat v1b \ flat v1a @ s3 \ L r1 \ s4 \ L r2" by blast - then obtain vA vB where q2: "flat vA = flat v1a @ s3" "\ vA : r1" "flat vB = s4" "\ vB : r2" + then obtain vA vB where q2: "flat vA = flat v1a @ s3" "\ vA : r1" "flat vB = s4" "\ vB : r2" using L_flat_Prf2 by blast then have "Seq vA vB \ LV (SEQ r1 r2) s" unfolding eqs using q1 - by (auto simp add: LV_def intro: Prf.intros) + by (auto simp add: LV_def intro!: Prf.intros) with as1 have "\ Seq vA vB :\val Seq v1a v1b" unfolding eqs by auto then have "\ vA :\val v1a \ length (flat vA) > length (flat v1a)" using q1 q2 PosOrd_SeqI1 by auto then show "False" @@ -903,14 +927,14 @@ by (rule Posix.intros) next case (STAR r s v1) - have IH: "\s v1. \v1 \ CV r s; \v2\LV r s. \ v2 :\val v1\ \ s \ r \ v1" by fact + have IH: "\s v1. \v1 \ LV r s; \v2\LV r s. \ v2 :\val v1\ \ s \ r \ v1" by fact have as1: "\v2\LV (STAR r) s. \ v2 :\val v1" by fact - have as2: "v1 \ CV (STAR r) s" by fact + have as2: "v1 \ LV (STAR r) s" by fact then obtain vs where eqs: "v1 = Stars vs" "s = flat (Stars vs)" - "\v \ set vs. v \ CV r (flat v)" - unfolding CV_def by (auto elim: CPrf.cases) + "\v \ set vs. v \ LV r (flat v)" + unfolding LV_def by (auto elim: Prf.cases) have "\v\set vs. flat v \ r \ v \ flat v \ []" proof fix v @@ -926,9 +950,9 @@ assume "v2 \ LV r (flat v)" then have "Stars (pre @ [v2] @ post) \ LV (STAR r) s" using as2 unfolding e eqs - apply(auto simp add: CV_def LV_def intro!: Prf.intros)[1] - using CPrf_Stars_appendE Prf_CPrf Prf_elims(6) list.set_intros apply blast - by (metis CPrf_Stars_appendE Prf_CPrf Prf_elims(6) list.set_intros(2) val.inject(5)) + apply(auto simp add: LV_def intro!: Prf.intros elim: Prf_elims dest: Prf_Stars_appendE) + apply(auto dest!: Prf_Stars_appendE elim: Prf.cases) + done then have "\ Stars (pre @ [v2] @ post) :\val Stars (pre @ [v] @ post)" using q by simp with w show "False" @@ -936,25 +960,18 @@ PosOrd_StarsI PosOrd_Stars_appendI by auto qed with IH - show "flat v \ r \ v \ flat v \ []" using a as2 unfolding eqs CV_def - by (auto elim: CPrf.cases) + show "flat v \ r \ v \ flat v \ []" using a as2 unfolding eqs LV_def + by (auto elim: Prf.cases) qed moreover have "\ (\vs2\LV (STAR r) (flat (Stars vs)). vs2 :\val Stars vs)" proof assume "\vs2 \ LV (STAR r) (flat (Stars vs)). vs2 :\val Stars vs" - then obtain vs2 where "\ Stars vs2 : STAR r" "flat (Stars vs2) = flat (Stars vs)" + then obtain vs2 where "\ Stars vs2 : STAR r" "flat (Stars vs2) = flat (Stars vs)" "Stars vs2 :\val Stars vs" - unfolding LV_def - apply(auto) - apply(erule Prf.cases) - apply(auto intro: Prf.intros) - done + unfolding LV_def by (force elim: Prf_elims intro: Prf.intros) then show "False" using as1 unfolding eqs - apply - - apply(drule_tac x="Stars vs2" in bspec) - apply(auto simp add: LV_def) - done + by (auto simp add: LV_def) qed ultimately have "flat (Stars vs) \ STAR r \ Stars vs" thm PosOrd_Posix_Stars @@ -962,6 +979,55 @@ then show "s \ STAR r \ v1" unfolding eqs . qed +lemma Least_existence: + assumes "LV r s \ {}" + shows " \vmin \ LV r s. \v \ LV r s. vmin :\val v" +proof - + from assms + obtain vposix where "s \ r \ vposix" + unfolding LV_def + using L_flat_Prf1 lexer_correct_Some by blast + then have "\v \ LV r s. vposix :\val v" + by (simp add: Posix_PosOrd) + then show "\vmin \ LV r s. \v \ LV r s. vmin :\val v" + using Posix_LV \s \ r \ vposix\ by blast +qed + +lemma Least_existence1: + assumes "LV r s \ {}" + shows " \!vmin \ LV r s. \v \ (LV r s \ {v'. flat v' \spre s}). vmin :\val v" +using Least_existence[OF assms] assms +apply - +apply(erule bexE) +apply(rule_tac a="vmin" in ex1I) +apply(auto)[1] +apply (metis PosOrd_Posix PosOrd_ex_eq2 PosOrd_spreI PosOrdeq_antisym Posix1(2)) +apply(auto)[1] +apply(simp add: PosOrdeq_antisym) +done + +lemma + shows "partial_order_on UNIV {(v1, v2). v1 :\val v2}" +apply(simp add: partial_order_on_def) +apply(simp add: preorder_on_def refl_on_def) +apply(simp add: PosOrdeq_refl) +apply(auto) +apply(rule transI) +apply(auto intro: PosOrdeq_trans)[1] +apply(rule antisymI) +apply(simp add: PosOrdeq_antisym) +done + +lemma + "wf {(v1, v2). v1 :\val v2 \ v1 \ LV r s \ v2 \ LV r s}" +apply(rule finite_acyclic_wf) +prefer 2 +apply(simp add: acyclic_def) +apply(induct_tac rule: trancl.induct) +apply(auto)[1] +oops + + unused_thms end \ No newline at end of file diff -r 32b222d77fa0 -r 6746f5e1f1f8 thys/Spec.thy --- a/thys/Spec.thy Fri Aug 11 20:29:01 2017 +0100 +++ b/thys/Spec.thy Fri Aug 18 14:51:29 2017 +0100 @@ -176,35 +176,6 @@ -section {* Lemmas about ders *} - -(* not really needed *) - -lemma ders_ZERO: - shows "ders s (ZERO) = ZERO" -apply(induct s) -apply(simp_all) -done - -lemma ders_ONE: - shows "ders s (ONE) = (if s = [] then ONE else ZERO)" -apply(induct s) -apply(simp_all add: ders_ZERO) -done - -lemma ders_CHAR: - shows "ders s (CHAR c) = - (if s = [c] then ONE else - (if s = [] then (CHAR c) else ZERO))" -apply(induct s) -apply(simp_all add: ders_ZERO ders_ONE) -done - -lemma ders_ALT: - shows "ders s (ALT r1 r2) = ALT (ders s r1) (ders s r2)" -by (induct s arbitrary: r1 r2)(simp_all) - - section {* Values *} datatype val = @@ -236,109 +207,33 @@ "flat (Stars vs) = flats vs" by (induct vs) (auto) - -section {* Relation between values and regular expressions *} - -inductive - Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) -where - "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" -| "\ v1 : r1 \ \ Left v1 : ALT r1 r2" -| "\ v2 : r2 \ \ Right v2 : ALT r1 r2" -| "\ Void : ONE" -| "\ Char c : CHAR c" -| "\v \ set vs. \ v : r \ \ Stars vs : STAR r" - -inductive_cases Prf_elims: - "\ v : ZERO" - "\ v : SEQ r1 r2" - "\ v : ALT r1 r2" - "\ v : ONE" - "\ v : CHAR c" - "\ vs : STAR r" - lemma Star_concat: assumes "\s \ set ss. s \ A" shows "concat ss \ A\" using assms by (induct ss) (auto) -lemma Star_string: +lemma Star_cstring: assumes "s \ A\" - shows "\ss. concat ss = s \ (\s \ set ss. s \ A)" + shows "\ss. concat ss = s \ (\s \ set ss. s \ A \ s \ [])" using assms apply(induct rule: Star.induct) -apply(auto) +apply(auto)[1] apply(rule_tac x="[]" in exI) apply(simp) +apply(erule exE) +apply(clarify) +apply(case_tac "s1 = []") +apply(rule_tac x="ss" in exI) +apply(simp) apply(rule_tac x="s1#ss" in exI) apply(simp) done -lemma Star_val: - assumes "\s\set ss. \v. s = flat v \ \ v : r" - shows "\vs. flats vs = concat ss \ (\v\set vs. \ v : r)" -using assms -apply(induct ss) -apply(auto) -apply(rule_tac x="[]" in exI) -apply(simp) -apply(rule_tac x="v#vs" in exI) -apply(simp) -done -lemma Prf_Stars_append: - assumes "\ Stars vs1 : STAR r" "\ Stars vs2 : STAR r" - shows "\ Stars (vs1 @ vs2) : STAR r" -using assms -by (auto intro!: Prf.intros elim!: Prf_elims) - -lemma Prf_flat_L: - assumes "\ v : r" - shows "flat v \ L r" -using assms -by (induct v r rule: Prf.induct) - (auto simp add: Sequ_def Star_concat) - - -lemma L_flat_Prf1: - assumes "\ v : r" - shows "flat v \ L r" -using assms -by (induct) (auto simp add: Sequ_def Star_concat) - -lemma L_flat_Prf2: - assumes "s \ L r" - shows "\v. \ v : r \ flat v = s" -using assms -proof(induct r arbitrary: s) - case (STAR r s) - have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact - have "s \ L (STAR r)" by fact - then obtain ss where "concat ss = s" "\s \ set ss. s \ L r" - using Star_string by auto - then obtain vs where "flats vs = s" "\v\set vs. \ v : r" - using IH Star_val by blast - then show "\v. \ v : STAR r \ flat v = s" - using Prf.intros(6) flat_Stars by blast -next - case (SEQ r1 r2 s) - then show "\v. \ v : SEQ r1 r2 \ flat v = s" - unfolding Sequ_def L.simps by (fastforce intro: Prf.intros) -next - case (ALT r1 r2 s) - then show "\v. \ v : ALT r1 r2 \ flat v = s" - unfolding L.simps by (fastforce intro: Prf.intros) -qed (auto intro: Prf.intros) - -lemma L_flat_Prf: - shows "L(r) = {flat v | v. \ v : r}" -using L_flat_Prf1 L_flat_Prf2 by blast - - -section {* Canonical Values *} +section {* Lexical Values *} inductive - CPrf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) + Prf :: "val \ rexp \ bool" ("\ _ : _" [100, 100] 100) where "\\ v1 : r1; \ v2 : r2\ \ \ Seq v1 v2 : SEQ r1 r2" | "\ v1 : r1 \ \ Left v1 : ALT r1 r2" @@ -347,34 +242,92 @@ | "\ Char c : CHAR c" | "\v \ set vs. \ v : r \ flat v \ [] \ \ Stars vs : STAR r" -lemma Prf_CPrf: - assumes "\ v : r" - shows "\ v : r" -using assms -by (induct)(auto intro: Prf.intros) +inductive_cases Prf_elims: + "\ v : ZERO" + "\ v : SEQ r1 r2" + "\ v : ALT r1 r2" + "\ v : ONE" + "\ v : CHAR c" + "\ vs : STAR r" -lemma CPrf_Stars_appendE: +lemma Prf_Stars_appendE: assumes "\ Stars (vs1 @ vs2) : STAR r" shows "\ Stars vs1 : STAR r \ \ Stars vs2 : STAR r" using assms -apply(erule_tac CPrf.cases) -apply(auto intro: CPrf.intros) +by (auto intro: Prf.intros elim!: Prf_elims) + + +lemma Star_cval: + assumes "\s\set ss. \v. s = flat v \ \ v : r" + shows "\vs. flats vs = concat ss \ (\v\set vs. \ v : r \ flat v \ [])" +using assms +apply(induct ss) +apply(auto) +apply(rule_tac x="[]" in exI) +apply(simp) +apply(case_tac "flat v = []") +apply(rule_tac x="vs" in exI) +apply(simp) +apply(rule_tac x="v#vs" in exI) +apply(simp) done -section {* Sets of Lexical and Canonical Values *} +lemma L_flat_Prf1: + assumes "\ v : r" + shows "flat v \ L r" +using assms +by (induct) (auto simp add: Sequ_def Star_concat) -definition - LV :: "rexp \ string \ val set" -where "LV r s \ {v. \ v : r \ flat v = s}" +lemma L_flat_Prf2: + assumes "s \ L r" + shows "\v. \ v : r \ flat v = s" +using assms +proof(induct r arbitrary: s) + case (STAR r s) + have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact + have "s \ L (STAR r)" by fact + then obtain ss where "concat ss = s" "\s \ set ss. s \ L r \ s \ []" + using Star_cstring by auto + then obtain vs where "flats vs = s" "\v\set vs. \ v : r \ flat v \ []" + using IH Star_cval by metis + then show "\v. \ v : STAR r \ flat v = s" + using Prf.intros(6) flat_Stars by blast +next + case (SEQ r1 r2 s) + then show "\v. \ v : SEQ r1 r2 \ flat v = s" + unfolding Sequ_def L.simps by (fastforce intro: Prf.intros) +next + case (ALT r1 r2 s) + then show "\v. \ v : ALT r1 r2 \ flat v = s" + unfolding L.simps by (fastforce intro: Prf.intros) +qed (auto intro: Prf.intros) + + +lemma L_flat_Prf: + shows "L(r) = {flat v | v. \ v : r}" +using L_flat_Prf1 L_flat_Prf2 by blast + + + +section {* Sets of Lexical Values *} + +text {* + Shows that lexical values are finite for a given regex and string. +*} definition - CV :: "rexp \ string \ val set" -where "CV r s \ {v. \ v : r \ flat v = s}" + LV :: "rexp \ string \ val set" +where "LV r s \ {v. \ v : r \ flat v = s}" -lemma LV_CV_subset: - shows "CV r s \ LV r s" -unfolding CV_def LV_def by(auto simp add: Prf_CPrf) +lemma LV_simps: + shows "LV ZERO s = {}" + and "LV ONE s = (if s = [] then {Void} else {})" + and "LV (CHAR c) s = (if s = [c] then {Char c} else {})" + and "LV (ALT r1 r2) s = Left ` LV r1 s \ Right ` LV r2 s" +unfolding LV_def +by (auto intro: Prf.intros elim: Prf.cases) + abbreviation "Prefixes s \ {s'. prefixeq s' s}" @@ -389,13 +342,6 @@ shows "Suffixes (c # s) = Suffixes s \ {c # s}" by (auto simp add: suffixeq_def Cons_eq_append_conv) -lemma CV_simps: - shows "CV ZERO s = {}" - and "CV ONE s = (if s = [] then {Void} else {})" - and "CV (CHAR c) s = (if s = [c] then {Char c} else {})" - and "CV (ALT r1 r2) s = Left ` CV r1 s \ Right ` CV r2 s" -unfolding CV_def -by (auto intro: CPrf.intros elim: CPrf.cases) lemma finite_Suffixes: shows "finite (Suffixes s)" @@ -423,34 +369,17 @@ ultimately show "finite (Prefixes s)" by simp qed -lemma CV_SEQ_subset: - "CV (SEQ r1 r2) s \ (\(v1,v2). Seq v1 v2) ` ((\s' \ Prefixes s. CV r1 s') \ (\s' \ Suffixes s. CV r2 s'))" -unfolding image_def CV_def prefixeq_def suffixeq_def -by (auto elim: CPrf.cases) - -lemma CV_STAR_subset: - "CV (STAR r) s \ {Stars []} \ - (\(v,vs). Stars (v#vs)) ` ((\s' \ Prefixes s. CV r s') \ (\s2 \ SSuffixes s. Stars -` (CV (STAR r) s2)))" -unfolding image_def CV_def prefixeq_def suffix_def -apply(auto) -apply(erule CPrf.cases) -apply(auto) -apply(case_tac vs) -apply(auto intro: CPrf.intros) -done - - -lemma CV_STAR_finite: - assumes "\s. finite (CV r s)" - shows "finite (CV (STAR r) s)" +lemma LV_STAR_finite: + assumes "\s. finite (LV r s)" + shows "finite (LV (STAR r) s)" proof(induct s rule: length_induct) fix s::"char list" - assume "\s'. length s' < length s \ finite (CV (STAR r) s')" - then have IH: "\s' \ SSuffixes s. finite (CV (STAR r) s')" + assume "\s'. length s' < length s \ finite (LV (STAR r) s')" + then have IH: "\s' \ SSuffixes s. finite (LV (STAR r) s')" by (auto simp add: suffix_def) def f \ "\(v, vs). Stars (v # vs)" - def S1 \ "\s' \ Prefixes s. CV r s'" - def S2 \ "\s2 \ SSuffixes s. Stars -` (CV (STAR r) s2)" + def S1 \ "\s' \ Prefixes s. LV r s'" + def S2 \ "\s2 \ SSuffixes s. Stars -` (LV (STAR r) s2)" have "finite S1" using assms unfolding S1_def by (simp_all add: finite_Prefixes) moreover @@ -459,44 +388,53 @@ ultimately have "finite ({Stars []} \ f ` (S1 \ S2))" by simp moreover - have "CV (STAR r) s \ {Stars []} \ f ` (S1 \ S2)" unfolding S1_def S2_def f_def - by (rule CV_STAR_subset) + have "LV (STAR r) s \ {Stars []} \ f ` (S1 \ S2)" + unfolding S1_def S2_def f_def + unfolding LV_def image_def prefixeq_def suffix_def + apply(auto elim: Prf_elims) + apply(erule Prf_elims) + apply(auto) + apply(case_tac vs) + apply(auto intro: Prf.intros) + done ultimately - show "finite (CV (STAR r) s)" by (simp add: finite_subset) + show "finite (LV (STAR r) s)" by (simp add: finite_subset) qed -lemma CV_finite: - shows "finite (CV r s)" +lemma LV_finite: + shows "finite (LV r s)" proof(induct r arbitrary: s) case (ZERO s) - show "finite (CV ZERO s)" by (simp add: CV_simps) + show "finite (LV ZERO s)" by (simp add: LV_simps) next case (ONE s) - show "finite (CV ONE s)" by (simp add: CV_simps) + show "finite (LV ONE s)" by (simp add: LV_simps) next case (CHAR c s) - show "finite (CV (CHAR c) s)" by (simp add: CV_simps) + show "finite (LV (CHAR c) s)" by (simp add: LV_simps) next case (ALT r1 r2 s) - then show "finite (CV (ALT r1 r2) s)" by (simp add: CV_simps) + then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps) next case (SEQ r1 r2 s) def f \ "\(v1, v2). Seq v1 v2" - def S1 \ "\s' \ Prefixes s. CV r1 s'" - def S2 \ "\s' \ Suffixes s. CV r2 s'" - have IHs: "\s. finite (CV r1 s)" "\s. finite (CV r2 s)" by fact+ + def S1 \ "\s' \ Prefixes s. LV r1 s'" + def S2 \ "\s' \ Suffixes s. LV r2 s'" + have IHs: "\s. finite (LV r1 s)" "\s. finite (LV r2 s)" by fact+ then have "finite S1" "finite S2" unfolding S1_def S2_def by (simp_all add: finite_Prefixes finite_Suffixes) moreover - have "CV (SEQ r1 r2) s \ f ` (S1 \ S2)" - unfolding f_def S1_def S2_def by (auto simp add: CV_SEQ_subset) + have "LV (SEQ r1 r2) s \ f ` (S1 \ S2)" + unfolding f_def S1_def S2_def + unfolding LV_def image_def prefixeq_def suffixeq_def + by (auto elim: Prf.cases) ultimately - show "finite (CV (SEQ r1 r2) s)" + show "finite (LV (SEQ r1 r2) s)" by (simp add: finite_subset) next case (STAR r s) - then show "finite (CV (STAR r) s)" by (simp add: CV_STAR_finite) + then show "finite (LV (STAR r) s)" by (simp add: LV_STAR_finite) qed @@ -533,14 +471,6 @@ by (induct s r v rule: Posix.induct) (auto simp add: Sequ_def) -lemma Posix_Prf: - assumes "s \ r \ v" - shows "\ v : r" -using assms -apply(induct s r v rule: Posix.induct) -apply(auto intro!: Prf.intros elim!: Prf_elims) -done - text {* Our Posix definition determines a unique value. *} @@ -616,30 +546,31 @@ Our POSIX value is a canonical value. *} -lemma Posix_CV: +lemma Posix_LV: assumes "s \ r \ v" - shows "v \ CV r s" + shows "v \ LV r s" using assms apply(induct rule: Posix.induct) -apply(auto simp add: CV_def intro: CPrf.intros elim: CPrf.cases) +apply(auto simp add: LV_def intro: Prf.intros elim: Prf.cases) apply(rotate_tac 5) -apply(erule CPrf.cases) +apply(erule Prf.cases) apply(simp_all) -apply(rule CPrf.intros) +apply(rule Prf.intros) apply(simp_all) done +(* lemma test2: assumes "\v \ set vs. flat v \ r \ v \ flat v \ []" - shows "(Stars vs) \ CV (STAR r) (flat (Stars vs))" + shows "(Stars vs) \ LV (STAR r) (flat (Stars vs))" using assms apply(induct vs) -apply(auto simp add: CV_def) -apply(rule CPrf.intros) +apply(auto simp add: LV_def) +apply(rule Prf.intros) apply(simp) -apply(rule CPrf.intros) +apply(rule Prf.intros) apply(simp_all) -by (metis (no_types, lifting) CV_def Posix_CV mem_Collect_eq) - +by (metis (no_types, lifting) LV_def Posix_LV mem_Collect_eq) +*) end \ No newline at end of file