# HG changeset patch # User Christian Urban # Date 1457321008 0 # Node ID d74bfa11802c97beca8ec3b1a6817eb1853447bc # Parent 71e26f43c896ab39db031cd6f7ccef6ddf4daaef updated diff -r 71e26f43c896 -r d74bfa11802c thys/Paper/Paper.thy --- a/thys/Paper/Paper.thy Sun Mar 06 20:00:47 2016 +0000 +++ b/thys/Paper/Paper.thy Mon Mar 07 03:23:28 2016 +0000 @@ -13,7 +13,7 @@ notation (latex output) If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and - Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,75] 73) and + Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,73] 73) and ZERO ("\<^bold>0" 78) and ONE ("\<^bold>1" 78) and @@ -32,7 +32,7 @@ L ("L'(_')" [10] 78) and der_syn ("_\\_" [79, 1000] 76) and ders_syn ("_\\_" [79, 1000] 76) and - flat ("|_|" [75] 73) and + flat ("|_|" [75] 74) and Sequ ("_ @ _" [78,77] 63) and injval ("inj _ _ _" [79,77,79] 76) and mkeps ("mkeps _" [79] 76) and @@ -40,7 +40,7 @@ length ("len _" [78] 73) and matcher ("lexer _ _" [78,78] 77) and - Prf ("\ _ : _" [75,75] 75) and + Prf ("_ : _" [75,75] 75) and PMatch ("'(_, _') \ _" [63,75,75] 75) (* and ValOrd ("_ \\<^bsub>_\<^esub> _" [78,77,77] 73) *) @@ -59,8 +59,8 @@ character~@{text c}, and showed that it gave a simple solution to the problem of matching a string @{term s} with a regular expression @{term r}: if the derivative of @{term r} w.r.t.\ (in succession) all the characters of -the string matches the empty string, then @{term r} matches @{term s} -(and {\em vice versa}). The derivative has the property (which may be +the string matches the empty string, then @{term r} matches @{term s} (and +{\em vice versa}). The derivative has the property (which may almost be regarded as its specification) that, for every string @{term s} and regular expression @{term r} and character @{term c}, one has @{term "cs \ L(r)"} if and only if \mbox{@{term "s \ L(der c r)"}}. The beauty of Brzozowski's @@ -68,8 +68,8 @@ and easily definable and reasoned about in theorem provers---the definitions just consist of inductive datatypes and simple recursive functions. A completely formalised correctness proof of this matcher in for example HOL4 -has been mentioned in~\cite{Owens2008}. Another one in Isabelle/HOL is -in \cite{Krauss2011}. +has been mentioned in~\cite{Owens2008}. Another one in Isabelle/HOL is part +of the work in \cite{Krauss2011}. One limitation of Brzozowski's matcher is that it only generates a YES/NO answer for whether a string is being matched by a regular expression. @@ -78,7 +78,7 @@ [lexical] {\em value}. They give a simple algorithm to calculate a value that appears to be the value associated with POSIX matching \cite{Kuklewicz,Vansummeren2006}. The challenge then is to specify that -value, in an algorithm-independent fashion, and to show that Sulzamann and +value, in an algorithm-independent fashion, and to show that Sulzmann and Lu's derivative-based algorithm does indeed calculate a value that is correct according to the specification. @@ -147,28 +147,33 @@ \noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords such as @{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising -identifiers (say, a single character followed by characters or numbers). Then we -can form the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} and use POSIX -matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. In the -first case we obtain by the longest match rule a single identifier token, -not a keyword followed by an identifier. In the second case we obtain by rule -priority a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} -matches also.\bigskip +identifiers (say, a single character followed by characters or numbers). +Then we can form the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} and use +POSIX matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. +For @{text "iffoo"} we obtain by the longest match rule a single identifier +token, not a keyword followed by an identifier. For @{text "if"} we obtain by +rule priority a keyword token, not an identifier token---even if @{text +"r\<^bsub>id\<^esub>"} matches also.\bigskip -\noindent {\bf Contributions:} (NOT DONE YET) We have implemented in Isabelle/HOL the -derivative-based regular expression matching algorithm as described by -Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this -algorithm according to our specification of what a POSIX value is. The +\noindent {\bf Contributions:} (NOT DONE YET) We have implemented in +Isabelle/HOL the derivative-based regular expression matching algorithm as +described by Sulzmann and Lu \cite{Sulzmann2014}. We have proved the +correctness of this algorithm according to our specification of what a POSIX +value is. Sulzmann and Lu sketch in \cite{Sulzmann2014} an informal +correctness proof: but to us it contains unfillable gaps. + informal correctness proof given in \cite{Sulzmann2014} is in final -form\footnote{} and to us contains unfillable gaps. Our specification of a -POSIX value consists of a simple inductive definition that given a string -and a regular expression uniquely determines this value. Derivatives as -calculated by Brzozowski's method are usually more complex regular -expressions than the initial one; various optimisations are possible, such -as the simplifications of @{term "ALT ZERO r"}, @{term "ALT r ZERO"}, @{term -"SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r}. One of the advantages of -having a simple specification and correctness proof is that the latter can -be refined to allow for such optimisations and simple correctness proof. +form\footnote{} and to us contains unfillable gaps. + +Our specification of a POSIX value consists of a simple inductive definition +that given a string and a regular expression uniquely determines this value. +Derivatives as calculated by Brzozowski's method are usually more complex +regular expressions than the initial one; various optimisations are +possible, such as the simplifications of @{term "ALT ZERO r"}, @{term "ALT r +ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r}. One of the +advantages of having a simple specification and correctness proof is that +the latter can be refined to allow for such optimisations and simple +correctness proof. An extended version of \cite{Sulzmann2014} is available at the website of its first author; this includes some ``proofs'', claimed in @@ -220,14 +225,14 @@ \noindent In the fourth clause we use the operation @{term "DUMMY ;; DUMMY"} for the concatenation of two languages (it is also list-append for - strings). We use the star-notation for regular expressions and languages - (in the last clause above). The star on languages is defined inductively - by two clauses: @{text "(i)"} for the empty string being in the star of a - language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a language and @{term - "s\<^sub>2"} in the star of this language, then also @{term "s\<^sub>1 @ s\<^sub>2"} is in - the star of this language. It will also be convenient to use the following - notion of a \emph{semantic derivative} (or \emph{left quotient}) of a - language, say @{text A}, defined as: + strings). We use the star-notation for regular expressions and for + languages (in the last clause above). The star for languages is defined + inductively by two clauses: @{text "(i)"} the empty string being in + the star of a language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a + language and @{term "s\<^sub>2"} in the star of this language, then also @{term + "s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient + to use the following notion of a \emph{semantic derivative} (or \emph{left + quotient}) of a language defined as: \begin{center} \begin{tabular}{lcl} @@ -306,10 +311,10 @@ \noindent gives a positive answer if and only if @{term "s \ L r"}. Consequently, this regular expression matching algorithm satisfies the usual specification. While the matcher above calculates a provably correct - a YES/NO answer for whether a regular expression matches a string, the - novel idea of Sulzmann and Lu \cite{Sulzmann2014} is to append another - phase to this algorithm in order to calculate a [lexical] value. We will - explain the details next. + YES/NO answer for whether a regular expression matches a string, the novel + idea of Sulzmann and Lu \cite{Sulzmann2014} is to append another phase to + this algorithm in order to calculate a [lexical] value. We will explain + the details next. *} @@ -317,10 +322,11 @@ text {* -The clever idea in \cite{Sulzmann2014} is to introduce values for encoding -\emph{how} a regular expression matches a string and then define a function -on values that mirrors (but inverts) the construction of the derivative on -regular expressions. \emph{Values} are defined as the inductive datatype + The clever idea in \cite{Sulzmann2014} is to introduce values for encoding + \emph{how} a regular expression matches a string and then define a + function on values that mirrors (but inverts) the construction of the + derivative on regular expressions. \emph{Values} are defined as the + inductive datatype \begin{center} @{text "v :="} @@ -334,7 +340,7 @@ \noindent where we use @{term vs} standing for a list of values. (This is similar to the approach taken by Frisch and Cardelli for GREEDY matching - \cite{Frisch2014}, and Sulzmann and Lu \cite{2014} for POSIX matching). + \cite{Frisch2004}, and Sulzmann and Lu \cite{Sulzmann2014} for POSIX matching). The string underlying a value can be calculated by the @{const flat} function, written @{term "flat DUMMY"} and defined as: @@ -367,18 +373,18 @@ \noindent Note that no values are associated with the regular expression @{term ZERO}, and that the only value associated with the regular - expression @{term ONE} is @{term Void}, pronounced (if one must) as {\em - ``Void''}. It is routine to establish how values ``inhabiting'' a regular + expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text + "Void"}. It is routine to establish how values ``inhabiting'' a regular expression correspond to the language of a regular expression, namely \begin{proposition} @{thm L_flat_Prf} \end{proposition} - In general there are more than one value associated with a regular + In general there is more than one value associated with a regular expression. In case of POSIX matching the problem is to calculate the - unique value that satisfies the (informal) POSIX constraints from the - Introduction. Graphically the regular expression matching algorithm by + unique value that satisfies the (informal) POSIX rules from the + Introduction. Graphically the POSIX value calculation algorithm by Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} where the path from the left to the right involving @{const der}/@{const nullable} is the first phase of the algorithm (calculating successive @@ -390,7 +396,7 @@ whether the resulting derivative regular expression @{term "r\<^sub>4"} can match the empty string. If yes, we call the function @{const mkeps} that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can - match the empty string (taking into account the POSIX constraints in case + match the empty string (taking into account the POSIX rules in case there are several ways). This functions is defined by the clauses: \begin{figure}[t] @@ -439,32 +445,33 @@ \noindent Note that this function needs only to be partially defined, namely only for regular expressions that are nullable. In case @{const nullable} fails, the string @{term "[a,b,c]"} cannot be matched by @{term - "r\<^sub>1"} and an error is raised. Note also how this function makes - some subtle choices leading to a POSIX value: for example if the - alternative, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can match the empty - string and furthermore @{term "r\<^sub>1"} can match the empty string, - then we return a @{const Left}-value. The @{const Right}-value will only - be returned if @{term "r\<^sub>1"} is not nullable. + "r\<^sub>1"} and an error is raised instead. Note also how this function + makes some subtle choices leading to a POSIX value: for example if an + alternative regular expression, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can + match the empty string and furthermore @{term "r\<^sub>1"} can match the + empty string, then we return a @{text Left}-value. The @{text + Right}-value will only be returned if @{term "r\<^sub>1"} is not nullable. The most interesting novelty from Sulzmann and Lu \cite{Sulzmann2014} is - the construction value for how @{term "r\<^sub>1"} can match the string - @{term "[a,b,c]"} from the value how the last derivative, @{term + the construction of a value for how @{term "r\<^sub>1"} can match the + string @{term "[a,b,c]"} from the value how the last derivative, @{term "r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and - Lu acchieve this by stepwise ``injecting back'' the characters into the + Lu achieve this by stepwise ``injecting back'' the characters into the values thus inverting the operation of building derivatives on the level of values. The corresponding function, called @{term inj}, takes three arguments, a regular expression, a character and a value. For example in - the first @{term inj}-step in Fig~\ref{Sulz} the regular expression @{term - "r\<^sub>3"}, the character @{term c} from the last derivative step and - @{term "v\<^sub>4"}, which is the value corresponding to the derivative - regular expression @{term "r\<^sub>4"}. The result is the new value @{term - "v\<^sub>3"}. The final result of the algorithm is the value @{term - "v\<^sub>1"} corresponding to the input regular expression. The @{term - inj} function is by recursion on the regular expression and by analysing - the shape of values. + the first (or right-most) @{term inj}-step in Fig~\ref{Sulz} the regular + expression @{term "r\<^sub>3"}, the character @{term c} from the last + derivative step and @{term "v\<^sub>4"}, which is the value corresponding + to the derivative regular expression @{term "r\<^sub>4"}. The result is + the new value @{term "v\<^sub>3"}. The final result of the algorithm is + the value @{term "v\<^sub>1"} corresponding to the input regular + expression. The @{term inj} function is by recursion on the regular + expressions and by analysing the shape of values (corresponding to + the derivative regular expressions). \begin{center} - \begin{tabular}{llcl} + \begin{tabular}{l@ {\hspace{5mm}}lcl} (1) & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ (2) & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ @@ -483,8 +490,9 @@ \noindent To better understand what is going on in this definition it might be instructive to look first at the three sequence cases (clauses - (4)--(6)). In each case we need to construct an ``injected value'' for @{term - "SEQ r\<^sub>1 r\<^sub>2"}. Recall the clause of the @{const der}-function + (4)--(6)). In each case we need to construct an ``injected value'' for + @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term + "Seq DUMMY DUMMY"}. Recall the clause of the @{const der}-function for sequence regular expressions: \begin{center} @@ -493,25 +501,28 @@ \noindent Consider first the else-branch where the derivative is @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}. The corresponding value must therefore - be the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches clause (4) of - @{term inj}. In the if-branch the derivative is an alternative, namely - @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}. This - means we either have to consider a @{text Left}- or @{text Right}-value. - In case of the @{text Left}-value we know further it must be a value for a - sequence regular expression. Therefore the pattern we match in the clause - (5) is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, while in (6) it is just - @{term "Right v\<^sub>2"}. One more interesting point is in the right-hand - side of clause (6): since in this case the regular expression @{text - "r\<^sub>1"} does not ``contribute'' in matching the string, that is only - matches the empty string, we need to call @{const mkeps} in order to - construct a value how @{term "r\<^sub>1"} can match this empty string. A - similar argument applies for why we can expect in clause (7) that the - value is of the form @{term "Seq v (Stars vs)"} (the derivative of a star - is @{term "SEQ r (STAR r)"}). Finally, the reason for why we can ignore - the second argument in clause (1) of @{term inj} is that it will only ever - be called in cases where @{term "c=d"}, but the usual linearity - restrictions in pattern-matches do not allow is to build this constraint - explicitly into the pattern. + be the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches the left-hand + side in clause (4) of @{term inj}. In the if-branch the derivative is an + alternative, namely @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c + r\<^sub>2)"}. This means we either have to consider a @{text Left}- or + @{text Right}-value. In case of the @{text Left}-value we know further it + must be a value for a sequence regular expression. Therefore the pattern + we match in the clause (5) is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, + while in (6) it is just @{term "Right v\<^sub>2"}. One more interesting + point is in the right-hand side of clause (6): since in this case the + regular expression @{text "r\<^sub>1"} does not ``contribute'' for + matching the string, that is only matches the empty string, we need to + call @{const mkeps} in order to construct a value how @{term "r\<^sub>1"} + can match this empty string. A similar argument applies for why we can + expect in the left-hand side of clause (7) that the value is of the form + @{term "Seq v (Stars vs)"}---the derivative of a star is @{term "SEQ r + (STAR r)"}. Finally, the reason for why we can ignore the second argument + in clause (1) of @{term inj} is that it will only ever be called in cases + where @{term "c=d"}, but the usual linearity restrictions in patterns do + not allow is to build this constraint explicitly into our function + definition.\footnote{Sulzmann and Lu state this clause as @{thm (lhs) + injval.simps(1)[of "c" "c"]} $\dn$ @{thm (rhs) injval.simps(1)[of "c"]}, + but our deviation is harmless.} Having defined the @{const mkeps} and @{text inj} function we can extend \Brz's matcher so that a [lexical] value is constructed (assuming the @@ -527,38 +538,81 @@ \end{center} \noindent If the regular expression does not match, @{const None} is - returned. If the regular expression does match the string, then @{const - Some} value is returned. Again the virtues of this algorithm is that it - can be implemented with ease in a functional programming language and also - in Isabelle/HOL. In the remaining part of this section we prove that - this algorithm is correct. + returned, indicating an error is raised. If the regular expression does + match the string, then @{const Some} value is returned. One important + virtue of this algorithm is that it can be implemented with ease in a + functional programming language and also in Isabelle/HOL. In the remaining + part of this section we prove that this algorithm is correct. The well-known idea of POSIX matching is informally defined by the longest match and priority rule; as correctly argued in \cite{Sulzmann2014}, this - needs formal specification. - - We use a simple inductive definition to specify this notion, incorporating - the POSIX-specific choices into the side-conditions for the rules $R tl - +_2$, $R tl\circ$ and $R tl*$ (as they are now called). By contrast, - \cite{Sulzmann2014} defines a relation between values and argues that there is a - maximum value, as given by the derivative-based algorithm yet to be spelt - out. The relation we define is ternary, relating strings, values and regular - expressions. - -Our Posix relation @{term "s \ r \ v"} + needs formal specification. Sulzmann and Lu define a \emph{dominance} + relation\footnote{Sulzmann and Lu call it an ordering relation, but + without giving evidence that it is transitive.} between values and argue that + there is a maximum value, as given by the derivative-based algorithm. In + contrast, we shall next introduce a simple inductive definition to specify + what a \emph{POSIX value} is, incorporating the POSIX-specific choices + into the side-conditions of our rules. Our definition is inspired by the + matching relation given in \cite{Vansummeren2006}. The relation we define + is ternary and written as \mbox{@{term "s \ r \ v"}}, relating strings, + regular expressions and values. \begin{center} \begin{tabular}{c} @{thm[mode=Axiom] PMatch.intros(1)} \qquad - @{thm[mode=Axiom] PMatch.intros(2)}\medskip\\ + @{thm[mode=Axiom] PMatch.intros(2)}\bigskip\\ @{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad - @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\medskip\\ - \multicolumn{1}{p{5cm}}{@{thm[mode=Rule] PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}\medskip\\ - @{thm[mode=Rule] PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}\medskip\\ - @{thm[mode=Axiom] PMatch.intros(7)}\medskip\\ + @{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\bigskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad + @{thm (prem 2) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ + @{thm (prem 3) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} + {@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\\ + @{thm[mode=Axiom] PMatch.intros(7)}\bigskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 2) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 3) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ + @{thm (prem 4) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} + {@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$ \end{tabular} \end{center} + \noindent We claim that this relation captures the idea behind the two + informal POSIX rules shown in the Introduction: Consider the second line + where the POSIX values for an alternative regular expression is + specified---it is always a @{text "Left"}-value, \emph{except} when the + string to be matched is not in the language of @{term "r\<^sub>1"}; only then it + is a @{text Right}-value (see the side-condition in the rule on the + right). Interesting is also the rule for sequence regular expressions + (third line). The first two premises state that @{term "v\<^sub>1"} and + @{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1, + r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively. + + \begin{theorem} + @{thm[mode=IfThen] PMatch_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]} + \end{theorem} + + \begin{lemma} + @{thm[mode=IfThen] PMatch_mkeps} + \end{lemma} + + \begin{lemma} + @{thm[mode=IfThen] Prf_injval_flat} + \end{lemma} + + \begin{lemma} + @{thm[mode=IfThen] PMatch2_roy_version} + \end{lemma} + + \begin{theorem}\mbox{}\smallskip\\ + \begin{tabular}{ll} + (1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\ + (2) & @{thm (lhs) lex_correct3a} if and only if @{thm (rhs) lex_correct3a}\\ + \end{tabular} + \end{theorem} *} section {* The Argument by Sulzmmann and Lu *} @@ -575,42 +629,6 @@ text {* - - - \noindent - Values - - - - - - - - \noindent - The @{const mkeps} function - - - - \noindent - The @{text inj} function - - - - \noindent - The inhabitation relation: - - \begin{center} - \begin{tabular}{c} - @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ - @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad - @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ - @{thm[mode=Axiom] Prf.intros(4)} \qquad - @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ - @{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad - @{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ - \end{tabular} - \end{center} - \noindent We have also introduced a slightly restricted version of this relation where the last rule is restricted so that @{term "flat v \ []"}. diff -r 71e26f43c896 -r d74bfa11802c thys/Paper/document/root.bib --- a/thys/Paper/document/root.bib Sun Mar 06 20:00:47 2016 +0000 +++ b/thys/Paper/document/root.bib Mon Mar 07 03:23:28 2016 +0000 @@ -1,3 +1,4 @@ + @inproceedings{Sulzmann2014, author = {M.~Sulzmann and K.~Lu}, title = {{POSIX} {R}egular {E}xpression {P}arsing with {D}erivatives}, diff -r 71e26f43c896 -r d74bfa11802c thys/ReStar.thy --- a/thys/ReStar.thy Sun Mar 06 20:00:47 2016 +0000 +++ b/thys/ReStar.thy Mon Mar 07 03:23:28 2016 +0000 @@ -722,6 +722,20 @@ apply(simp add: der_correctness Der_def) done +lemma lex_correct1a: + shows "s \ L r \ matcher r s = None" +using assms +apply(induct s arbitrary: r) +apply(simp) +apply (metis nullable_correctness) +apply(auto) +apply(drule_tac x="der a r" in meta_spec) +apply(auto) +apply(simp add: der_correctness Der_def) +apply(drule_tac x="der a r" in meta_spec) +apply(auto) +apply(simp add: der_correctness Der_def) +done lemma lex_correct2: assumes "s \ L r" @@ -751,6 +765,21 @@ apply(auto) by (metis PMatch2_roy_version) +lemma lex_correct3a: + shows "s \ L r \ (\v. matcher r s = Some(v) \ s \ r \ v)" +using assms +apply(induct s arbitrary: r) +apply(simp) +apply (metis PMatch_mkeps nullable_correctness) +apply(drule_tac x="der a r" in meta_spec) +apply(auto) +apply(metis PMatch2_roy_version) +apply(simp add: der_correctness Der_def) +using lex_correct1a +apply fastforce +apply(simp add: der_correctness Der_def) +by (simp add: lex_correct1a) + lemma lex_correct5: assumes "s \ L r" shows "s \ r \ (matcher2 r s)" diff -r 71e26f43c896 -r d74bfa11802c thys/paper.pdf Binary file thys/paper.pdf has changed