lexing: changeset 291:25b7d6bfd294

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys/Journal/PaperExt.thy	Tue Aug 21 14:14:19 2018 +0100
@@ -0,0 +1,323 @@
+(*<*)
+theory PaperExt
+imports 
+   (*"../LexerExt"*)
+   (*"../PositionsExt"*)
+   "~~/src/HOL/Library/LaTeXsugar"
+begin
+(*>*)
+
+(*
+declare [[show_question_marks = false]]
+declare [[eta_contract = false]]
+
+abbreviation 
+  "der_syn r c \<equiv> der c r"
+
+abbreviation 
+  "ders_syn r s \<equiv> ders s r"
+
+
+notation (latex output)
+  If  ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and
+  Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) and  
+
+  ZERO ("\<^bold>0" 81) and 
+  ONE ("\<^bold>1" 81) and 
+  CHAR ("_" [1000] 80) and
+  ALT ("_ + _" [77,77] 78) and
+  SEQ ("_ \<cdot> _" [77,77] 78) and
+  STAR ("_\<^sup>\<star>" [78] 78) and
+  NTIMES ("_\<^bsup>'{_'}\<^esup>" [78, 50] 80) and
+  FROMNTIMES ("_\<^bsup>'{_..'}\<^esup>" [78, 50] 80) and
+  UPNTIMES ("_\<^bsup>'{.._'}\<^esup>" [78, 50] 80) and
+  NMTIMES ("_\<^bsup>'{_.._'}\<^esup>" [78, 50,50] 80) and
+
+  val.Void ("Empty" 78) and
+  val.Char ("Char _" [1000] 78) and
+  val.Left ("Left _" [79] 78) and
+  val.Right ("Right _" [1000] 78) and
+  val.Seq ("Seq _ _" [79,79] 78) and
+  val.Stars ("Stars _" [1000] 78) and
+
+  L ("L'(_')" [10] 78) and
+  LV ("LV _ _" [80,73] 78) and
+  der_syn ("_\\_" [79, 1000] 76) and  
+  ders_syn ("_\\_" [79, 1000] 76) and
+  flat ("|_|" [75] 74) and
+  flats ("|_|" [72] 74) and
+  Sequ ("_ @ _" [78,77] 63) and
+  injval ("inj _ _ _" [81,77,79] 76) and 
+  mkeps ("mkeps _" [79] 76) and 
+  length ("len _" [73] 73) and
+  intlen ("len _" [73] 73) and
+  set ("_" [73] 73) and
+ 
+  Prf ("_ : _" [75,75] 75) and
+  Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
+ 
+  lexer ("lexer _ _" [78,78] 77) and
+  DUMMY ("\<^latex>\<open>\\underline{\\hspace{2mm}}\<close>")
+*)  
+(*>*)
+
+(*
+section {* Extensions*}
+
+text {*
+
+  A strong point in favour of
+  Sulzmann and Lu's algorithm is that it can be extended in various
+  ways.  If we are interested in tokenising a string, then we need to not just
+  split up the string into tokens, but also ``classify'' the tokens (for
+  example whether it is a keyword or an identifier). This can be done with
+  only minor modifications to the algorithm by introducing \emph{record
+  regular expressions} and \emph{record values} (for example
+  \cite{Sulzmann2014b}):
+
+  \begin{center}  
+  @{text "r :="}
+  @{text "..."} $\mid$
+  @{text "(l : r)"} \qquad\qquad
+  @{text "v :="}
+  @{text "..."} $\mid$
+  @{text "(l : v)"}
+  \end{center}
+  
+  \noindent where @{text l} is a label, say a string, @{text r} a regular
+  expression and @{text v} a value. All functions can be smoothly extended
+  to these regular expressions and values. For example \mbox{@{text "(l :
+  r)"}} is nullable iff @{term r} is, and so on. The purpose of the record
+  regular expression is to mark certain parts of a regular expression and
+  then record in the calculated value which parts of the string were matched
+  by this part. The label can then serve as classification for the tokens.
+  For this recall the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} for
+  keywords and identifiers from the Introduction. With the record regular
+  expression we can form \mbox{@{text "((key : r\<^bsub>key\<^esub>) + (id : r\<^bsub>id\<^esub>))\<^sup>\<star>"}}
+  and then traverse the calculated value and only collect the underlying
+  strings in record values. With this we obtain finite sequences of pairs of
+  labels and strings, for example
+
+  \[@{text "(l\<^sub>1 : s\<^sub>1), ..., (l\<^sub>n : s\<^sub>n)"}\]
+  
+  \noindent from which tokens with classifications (keyword-token,
+  identifier-token and so on) can be extracted.
+
+  In the context of POSIX matching, it is also interesting to study additional
+  constructors about bounded-repetitions of regular expressions. For this let
+  us extend the results from the previous section to the following four
+  additional regular expression constructors:
+
+  \begin{center}
+  \begin{tabular}{lcrl@ {\hspace{12mm}}l}
+  @{text r} & @{text ":="} & $\ldots\mid$ & @{term "NTIMES r n"} & exactly-@{text n}-times\\
+            &              & $\mid$   & @{term "UPNTIMES r n"} & upto-@{text n}-times\\
+	    &              & $\mid$   & @{term "FROMNTIMES r n"} & from-@{text n}-times\\
+	    &              & $\mid$   & @{term "NMTIMES r n m"} & between-@{text nm}-times\\
+  \end{tabular}
+  \end{center}
+
+  \noindent
+  We will call them \emph{bounded regular expressions}.
+  With the help of the power operator (definition ommited) for sets of strings, the languages
+  recognised by these regular expression can be defined in Isabelle as follows:
+
+  \begin{center}
+  \begin{tabular}{lcl} 
+  @{thm (lhs) L.simps(8)} & $\dn$ & @{thm (rhs) L.simps(8)}\\
+  @{thm (lhs) L.simps(7)} & $\dn$ & @{thm (rhs) L.simps(7)}\\
+  @{thm (lhs) L.simps(9)} & $\dn$ & @{thm (rhs) L.simps(9)}\\
+  @{thm (lhs) L.simps(10)} & $\dn$ & @{thm (rhs) L.simps(10)}\\
+  \end{tabular}
+  \end{center}
+
+  \noindent This definition implies that in the last clause @{term
+  "NMTIMES r n m"} matches no string in case @{term "m < n"}, because
+  then the interval @{term "{n..m}"} is empty.  While the language
+  recognised by these regular expressions is straightforward, some
+  care is needed for how to define the corresponding lexical
+  values. First, with a slight abuse of language, we will (re)use
+  values of the form @{term "Stars vs"} for values inhabited in
+  bounded regular expressions. Second, we need to introduce inductive
+  rules for extending our inhabitation relation shown on
+  Page~\ref{prfintros}, from which we then derived our notion of
+  lexical values. Given the rule for @{term "STAR r"}, the rule for
+  @{term "UPNTIMES r n"} just requires additionally that the length of
+  the list of values must be smaller or equal to @{term n}:
+
+  \begin{center}
+  @{thm[mode=Rule] Prf.intros(7)[of "vs"]}
+  \end{center}
+
+  \noindent Like in the @{term "STAR r"}-rule, we require with the
+  left-premise that some non-empty part of the string is `chipped'
+  away by \emph{every} value in @{text vs}, that means the corresponding
+  values do not flatten to the empty string. In the rule for @{term
+  "NTIMES r n"} (that is exactly-@{term n}-times @{text r}) we clearly need to require
+  that the length of the list of values equals to @{text n}. But enforcing
+  that every of these @{term n} values `chipps' away some part of a string
+  would be too strong. 
+
+
+  \begin{center}
+  @{thm[mode=Rule] Prf.intros(8)[of "vs\<^sub>1" r "vs\<^sub>2"]}
+  \end{center}
+
+  \begin{center}
+  \begin{tabular}{lcl} 
+  @{thm (lhs) der.simps(8)} & $\dn$ & @{thm (rhs) der.simps(8)}\\
+  @{thm (lhs) der.simps(7)} & $\dn$ & @{thm (rhs) der.simps(7)}\\
+  @{thm (lhs) der.simps(9)} & $\dn$ & @{thm (rhs) der.simps(9)}\\
+  @{thm (lhs) der.simps(10)} & $\dn$ & @{thm (rhs) der.simps(10)}\\
+  \end{tabular}
+  \end{center} 
+
+
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)}\\
+  @{thm (lhs) mkeps.simps(6)} & $\dn$ & @{thm (rhs) mkeps.simps(6)}\\
+  @{thm (lhs) mkeps.simps(7)} & $\dn$ & @{thm (rhs) mkeps.simps(7)}\\
+  @{thm (lhs) mkeps.simps(8)} & $\dn$ & @{thm (rhs) mkeps.simps(8)}\\
+  \end{tabular}
+  \end{center}
+
+  \begin{center}
+  \begin{tabular}{lcl}
+  @{thm (lhs) injval.simps(8)} & $\dn$ & @{thm (rhs) injval.simps(8)}\\
+  @{thm (lhs) injval.simps(9)} & $\dn$ & @{thm (rhs) injval.simps(9)}\\
+  @{thm (lhs) injval.simps(10)} & $\dn$ & @{thm (rhs) injval.simps(10)}\\
+  @{thm (lhs) injval.simps(11)} & $\dn$ & @{thm (rhs) injval.simps(11)}\\
+  \end{tabular}
+  \end{center}
+  
+
+  @{thm [mode=Rule] Posix_NTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_NTIMES2}
+
+  @{thm [mode=Rule] Posix_UPNTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_UPNTIMES2}
+
+  @{thm [mode=Rule] Posix_FROMNTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_FROMNTIMES3[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_FROMNTIMES2}
+
+  @{thm [mode=Rule] Posix_NMTIMES1[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_NMTIMES3[of "s\<^sub>1" r v "s\<^sub>2"]}
+
+  @{thm [mode=Rule] Posix_NMTIMES2}
+
+
+
+   @{term "\<Sum> i \<in> {m..n} . P i"}  @{term "\<Sum> i \<in> {..n} . P i"}
+  @{term "\<Union> i \<in> {m..n} . P i"}  @{term "\<Union> i \<in> {..n} . P i"}
+   @{term "\<Union> i \<in> {0::nat..n} . P i"}
+
+
+
+*}
+
+
+
+section {* The Correctness Argument by Sulzmann and Lu\label{argu} *}
+
+text {*
+%  \newcommand{\greedy}{\succcurlyeq_{gr}}
+ \newcommand{\posix}{>}
+
+  An extended version of \cite{Sulzmann2014} is available at the website of
+  its first author; this includes some ``proofs'', claimed in
+  \cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in
+  final form, we make no comment thereon, preferring to give general reasons
+  for our belief that the approach of \cite{Sulzmann2014} is problematic.
+  Their central definition is an ``ordering relation'' defined by the
+  rules (slightly adapted to fit our notation):
+
+  ??
+
+  \noindent The idea behind the rules (A1) and (A2), for example, is that a
+  @{text Left}-value is bigger than a @{text Right}-value, if the underlying
+  string of the @{text Left}-value is longer or of equal length to the
+  underlying string of the @{text Right}-value. The order is reversed,
+  however, if the @{text Right}-value can match a longer string than a
+  @{text Left}-value. In this way the POSIX value is supposed to be the
+  biggest value for a given string and regular expression.
+
+  Sulzmann and Lu explicitly refer to the paper \cite{Frisch2004} by Frisch
+  and Cardelli from where they have taken the idea for their correctness
+  proof. Frisch and Cardelli introduced a similar ordering for GREEDY
+  matching and they showed that their GREEDY matching algorithm always
+  produces a maximal element according to this ordering (from all possible
+  solutions). The only difference between their GREEDY ordering and the
+  ``ordering'' by Sulzmann and Lu is that GREEDY always prefers a @{text
+  Left}-value over a @{text Right}-value, no matter what the underlying
+  string is. This seems to be only a very minor difference, but it has
+  drastic consequences in terms of what properties both orderings enjoy.
+  What is interesting for our purposes is that the properties reflexivity,
+  totality and transitivity for this GREEDY ordering can be proved
+  relatively easily by induction.
+*}
+*)
+
+section {* Conclusion *}
+
+text {*
+
+  We have implemented the POSIX value calculation algorithm introduced by
+  Sulzmann and Lu
+  \cite{Sulzmann2014}. Our implementation is nearly identical to the
+  original and all modifications we introduced are harmless (like our char-clause for
+  @{text inj}). We have proved this algorithm to be correct, but correct
+  according to our own specification of what POSIX values are. Our
+  specification (inspired from work by Vansummeren \cite{Vansummeren2006}) appears to be
+  much simpler than in \cite{Sulzmann2014} and our proofs are nearly always
+  straightforward. We have attempted to formalise the original proof
+  by Sulzmann and Lu \cite{Sulzmann2014}, but we believe it contains
+  unfillable gaps. In the online version of \cite{Sulzmann2014}, the authors
+  already acknowledge some small problems, but our experience suggests
+  that there are more serious problems. 
+  
+  Having proved the correctness of the POSIX lexing algorithm in
+  \cite{Sulzmann2014}, which lessons have we learned? Well, this is a
+  perfect example for the importance of the \emph{right} definitions. We
+  have (on and off) explored mechanisations as soon as first versions
+  of \cite{Sulzmann2014} appeared, but have made little progress with
+  turning the relatively detailed proof sketch in \cite{Sulzmann2014} into a
+  formalisable proof. Having seen \cite{Vansummeren2006} and adapted the
+  POSIX definition given there for the algorithm by Sulzmann and Lu made all
+  the difference: the proofs, as said, are nearly straightforward. The
+  question remains whether the original proof idea of \cite{Sulzmann2014},
+  potentially using our result as a stepping stone, can be made to work?
+  Alas, we really do not know despite considerable effort.
+
+
+  Closely related to our work is an automata-based lexer formalised by
+  Nipkow \cite{Nipkow98}. This lexer also splits up strings into longest
+  initial substrings, but Nipkow's algorithm is not completely
+  computational. The algorithm by Sulzmann and Lu, in contrast, can be
+  implemented with ease in any functional language. A bespoke lexer for the
+  Imp-language is formalised in Coq as part of the Software Foundations book
+  by Pierce et al \cite{Pierce2015}. The disadvantage of such bespoke lexers is that they
+  do not generalise easily to more advanced features.
+  Our formalisation is available from the Archive of Formal Proofs \cite{aduAFP16}
+  under \url{http://www.isa-afp.org/entries/Posix-Lexing.shtml}.\medskip
+
+ \noindent
+  {\bf Acknowledgements:}
+  We are very grateful to Martin Sulzmann for his comments on our work and 
+  moreover for patiently explaining to us the details in \cite{Sulzmann2014}. We
+  also received very helpful comments from James Cheney and anonymous referees.
+  %  \small
+  \bibliographystyle{plain}
+  \bibliography{root}
+
+*}
+
+(*<*)
+end
+(*>*)
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys/SpecAlts.thy	Tue Aug 21 14:14:19 2018 +0100
@@ -0,0 +1,762 @@
+   
+theory SpecAlts
+  imports Main "~~/src/HOL/Library/Sublist"
+begin
+
+section {* Sequential Composition of Languages *}
+
+definition
+  Sequ :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
+where 
+  "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
+
+text {* Two Simple Properties about Sequential Composition *}
+
+lemma Sequ_empty_string [simp]:
+  shows "A ;; {[]} = A"
+  and   "{[]} ;; A = A"
+by (simp_all add: Sequ_def)
+
+lemma Sequ_empty [simp]:
+  shows "A ;; {} = {}"
+  and   "{} ;; A = {}"
+by (simp_all add: Sequ_def)
+
+
+section {* Semantic Derivative (Left Quotient) of Languages *}
+
+definition
+  Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
+where
+  "Der c A \<equiv> {s. c # s \<in> A}"
+
+definition
+  Ders :: "string \<Rightarrow> string set \<Rightarrow> string set"
+where
+  "Ders s A \<equiv> {s'. s @ s' \<in> A}"
+
+lemma Der_null [simp]:
+  shows "Der c {} = {}"
+unfolding Der_def
+by auto
+
+lemma Der_empty [simp]:
+  shows "Der c {[]} = {}"
+unfolding Der_def
+by auto
+
+lemma Der_char [simp]:
+  shows "Der c {[d]} = (if c = d then {[]} else {})"
+unfolding Der_def
+by auto
+
+lemma Der_union [simp]:
+  shows "Der c (A \<union> B) = Der c A \<union> Der c B"
+unfolding Der_def
+  by auto
+
+lemma Der_Union [simp]:
+  shows "Der c (\<Union>B. A) = (\<Union>B. Der c A)"
+unfolding Der_def
+by auto
+
+lemma Der_Sequ [simp]:
+  shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
+unfolding Der_def Sequ_def
+by (auto simp add: Cons_eq_append_conv)
+
+
+section {* Kleene Star for Languages *}
+
+inductive_set
+  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
+  for A :: "string set"
+where
+  start[intro]: "[] \<in> A\<star>"
+| step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
+
+(* Arden's lemma *)
+
+lemma Star_cases:
+  shows "A\<star> = {[]} \<union> A ;; A\<star>"
+unfolding Sequ_def
+by (auto) (metis Star.simps)
+
+lemma Star_decomp: 
+  assumes "c # x \<in> A\<star>" 
+  shows "\<exists>s1 s2. x = s1 @ s2 \<and> c # s1 \<in> A \<and> s2 \<in> A\<star>"
+using assms
+by (induct x\<equiv>"c # x" rule: Star.induct) 
+   (auto simp add: append_eq_Cons_conv)
+
+lemma Star_Der_Sequ: 
+  shows "Der c (A\<star>) \<subseteq> (Der c A) ;; A\<star>"
+unfolding Der_def Sequ_def
+by(auto simp add: Star_decomp)
+
+
+lemma Der_star [simp]:
+  shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
+proof -    
+  have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"  
+    by (simp only: Star_cases[symmetric])
+  also have "... = Der c (A ;; A\<star>)"
+    by (simp only: Der_union Der_empty) (simp)
+  also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
+    by simp
+  also have "... =  (Der c A) ;; A\<star>"
+    using Star_Der_Sequ by auto
+  finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
+qed
+
+
+section {* Regular Expressions *}
+
+datatype rexp =
+  ZERO
+| ONE
+| CHAR char
+| SEQ rexp rexp
+| ALTS "rexp list"
+| STAR rexp
+
+section {* Semantics of Regular Expressions *}
+ 
+fun
+  L :: "rexp \<Rightarrow> string set"
+where
+  "L (ZERO) = {}"
+| "L (ONE) = {[]}"
+| "L (CHAR c) = {[c]}"
+| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
+| "L (ALTS rs) = (\<Union>r \<in> set rs. L r)"
+| "L (STAR r) = (L r)\<star>"
+
+
+section {* Nullable, Derivatives *}
+
+fun
+ nullable :: "rexp \<Rightarrow> bool"
+where
+  "nullable (ZERO) = False"
+| "nullable (ONE) = True"
+| "nullable (CHAR c) = False"
+| "nullable (ALTS rs) = (\<exists>r \<in> set rs. nullable r)"
+| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
+| "nullable (STAR r) = True"
+
+
+fun
+ der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "der c (ZERO) = ZERO"
+| "der c (ONE) = ZERO"
+| "der c (CHAR d) = (if c = d then ONE else ZERO)"
+| "der c (ALTS rs) = ALTS (map (der c) rs)"
+| "der c (SEQ r1 r2) = 
+     (if nullable r1
+      then ALTS [SEQ (der c r1) r2, der c r2]
+      else SEQ (der c r1) r2)"
+| "der c (STAR r) = SEQ (der c r) (STAR r)"
+
+fun 
+ ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "ders [] r = r"
+| "ders (c # s) r = ders s (der c r)"
+
+
+lemma nullable_correctness:
+  shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
+by (induct r) (auto simp add: Sequ_def) 
+
+lemma der_correctness:
+  shows "L (der c r) = Der c (L r)"
+  apply(induct r) 
+       apply(simp_all add: nullable_correctness)
+  apply(auto simp add: Der_def)
+  done
+
+lemma ders_correctness:
+  shows "L (ders s r) = Ders s (L r)"
+by (induct s arbitrary: r)
+   (simp_all add: Ders_def der_correctness Der_def)
+
+fun flats :: "rexp list \<Rightarrow> rexp list"
+  where
+  "flats [] = []"
+| "flats (ZERO # rs1) = flats(rs1)"
+| "flats ((ALTS rs1) #rs2) = rs1 @ (flats rs2)"
+| "flats (r1 # rs2) = r1 # flats rs2"
+
+fun simp_SEQ where
+  "simp_SEQ ONE r\<^sub>2 = r\<^sub>2"
+| "simp_SEQ r\<^sub>1 ONE = r\<^sub>1"
+| "simp_SEQ r\<^sub>1 r\<^sub>2 = SEQ r\<^sub>1 r\<^sub>2"  
+ 
+fun 
+  simp :: "rexp \<Rightarrow> rexp"
+where
+  "simp (ALTS rs) = ALTS (remdups (flats (map simp rs)))" 
+| "simp (SEQ r1 r2) = simp_SEQ (simp r1) (simp r2)" 
+| "simp r = r"
+
+lemma simp_SEQ_correctness:
+  shows "L (simp_SEQ r1 r2) = L (SEQ r1 r2)"
+  apply(induct r1 r2 rule: simp_SEQ.induct)
+  apply(simp_all)
+  done
+
+lemma flats_correctness:
+  shows "(\<Union>r \<in> set (flats rs). L r) = L (ALTS rs)"
+  apply(induct rs rule: flats.induct)
+  apply(simp_all)
+  done
+
+
+lemma simp_correctness:
+  shows "L (simp r) = L r"
+  apply(induct r)
+  apply(simp_all)
+  apply(simp add: simp_SEQ_correctness)
+  apply(simp add: flats_correctness)
+  done
+
+fun 
+ ders2 :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
+where
+  "ders2 [] r = r"
+| "ders2 (c # s) r = ders2 s (simp (der c r))"
+
+lemma ders2_ZERO:
+  shows "ders2 s ZERO = ZERO"
+  apply(induct s)
+  apply(simp_all)
+  done
+
+lemma ders2_ONE:
+  shows "ders2 s ONE \<in> {ZERO, ONE}"
+  apply(induct s)
+  apply(simp_all)
+  apply(auto)
+  apply(case_tac s)
+  apply(auto)
+  apply(case_tac s)
+  apply(auto)
+  done
+
+lemma ders2_CHAR:
+  shows "ders2 s (CHAR c) \<in> {ZERO, ONE, CHAR c}"
+  apply(induct s)
+  apply(simp_all)
+  apply(auto simp add: ders2_ZERO)
+  apply(case_tac s)
+  apply(auto simp add: ders2_ZERO)
+  using ders2_ONE
+  apply(auto)[1]
+  using ders2_ONE
+  apply(auto)[1]
+  done
+
+lemma remdup_size:
+  shows "size_list f (remdups rs) \<le> size_list f rs"
+  apply(induct rs)
+   apply(simp_all)
+  done
+
+lemma flats_append:
+  shows "flats (rs1 @ rs2) = (flats rs1) @ (flats rs2)"
+  apply(induct rs1 arbitrary: rs2)
+   apply(auto)
+  apply(case_tac a)
+       apply(auto)
+  done
+
+lemma flats_Cons:
+  shows "flats (r # rs) = (flats [r]) @ (flats rs)"
+  apply(subst flats_append[symmetric])
+  apply(simp)
+  done
+
+lemma flats_size:
+  shows "size_list (\<lambda>x. size (ders2 s x)) (flats rs) \<le> size_list (\<lambda>x. size (ders2 s x))  rs"
+  apply(induct rs arbitrary: s rule: flats.induct)
+   apply(simp_all)
+   apply(simp add: ders2_ZERO)
+   apply (simp add: le_SucI)
+  
+   apply(subst flats_Cons)
+  apply(simp)
+  apply(case_tac a)
+       apply(auto)
+   apply(simp add: ders2_ZERO)
+   apply (simp add: le_SucI)
+  sorry
+
+lemma ders2_ALTS:
+  shows "size (ders2 s (ALTS rs)) \<le> size (ALTS (map (ders2 s) rs))"
+  apply(induct s arbitrary: rs)
+   apply(simp_all)
+  thm size_list_pointwise
+  apply (simp add: size_list_pointwise)
+  apply(drule_tac x="remdups (flats (map (simp \<circ> der a) rs))" in meta_spec)
+  apply(rule le_trans)
+   apply(assumption)
+  apply(simp)
+  apply(rule le_trans)
+   apply(rule remdup_size)
+  apply(simp add: comp_def)
+  apply(rule le_trans)
+  apply(rule flats_size)
+  by (simp add: size_list_pointwise)
+
+definition
+ "derss2 A r = {ders2 s r | s. s \<in> A}"
+
+lemma
+  "\<forall>rd \<in> derss2 (UNIV) r. size rd \<le> Suc (size r)"
+  apply(induct r)
+  apply(auto simp add: derss2_def ders2_ZERO)[1]
+      apply(auto simp add: derss2_def ders2_ZERO)[1]
+  using ders2_ONE
+      apply(auto)[1]
+  apply (metis rexp.size(7) rexp.size(8) zero_le)
+  using ders2_CHAR
+     apply(auto)[1]
+  apply (smt derss2_def le_SucI le_zero_eq mem_Collect_eq rexp.size(7) rexp.size(8) rexp.size(9))
+    defer  
+    apply(auto simp add: derss2_def)
+    apply(rule le_trans)
+     apply(rule ders2_ALTS)
+    apply(simp)
+    apply(simp add: comp_def)
+  
+    apply(simp add: size_list_pointwise)
+    apply(case_tac s)
+     apply(simp)
+  apply(simp only:)
+  apply(auto)[1]
+   
+  apply(case_tac s)
+        apply(simp)
+  apply(simp)
+
+section {* Values *}
+
+datatype val = 
+  Void
+| Char char
+| Seq val val
+| Nth nat val
+| Stars "val list"
+
+
+section {* The string behind a value *}
+
+fun 
+  flat :: "val \<Rightarrow> string"
+where
+  "flat (Void) = []"
+| "flat (Char c) = [c]"
+| "flat (Nth n v) = flat v"
+| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
+| "flat (Stars []) = []"
+| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" 
+
+abbreviation
+  "flats vs \<equiv> concat (map flat vs)"
+
+lemma flat_Stars [simp]:
+ "flat (Stars vs) = flats vs"
+by (induct vs) (auto)
+
+lemma Star_concat:
+  assumes "\<forall>s \<in> set ss. s \<in> A"  
+  shows "concat ss \<in> A\<star>"
+using assms by (induct ss) (auto)
+
+lemma Star_cstring:
+  assumes "s \<in> A\<star>"
+  shows "\<exists>ss. concat ss = s \<and> (\<forall>s \<in> set ss. s \<in> A \<and> s \<noteq> [])"
+using assms
+apply(induct rule: Star.induct)
+apply(auto)[1]
+apply(rule_tac x="[]" in exI)
+apply(simp)
+apply(erule exE)
+apply(clarify)
+apply(case_tac "s1 = []")
+apply(rule_tac x="ss" in exI)
+apply(simp)
+apply(rule_tac x="s1#ss" in exI)
+apply(simp)
+done
+
+
+section {* Lexical Values *}
+
+inductive 
+  Prf :: "val \<Rightarrow> rexp \<Rightarrow> bool" ("\<Turnstile> _ : _" [100, 100] 100)
+where
+  "\<lbrakk>\<Turnstile> v1 : r1; \<Turnstile> v2 : r2\<rbrakk> \<Longrightarrow> \<Turnstile>  Seq v1 v2 : SEQ r1 r2"
+| "\<lbrakk>\<Turnstile> v1 : (nth rs n); n < length rs\<rbrakk> \<Longrightarrow> \<Turnstile> (Nth n v1) : ALTS rs"
+| "\<Turnstile> Void : ONE"
+| "\<Turnstile> Char c : CHAR c"
+| "\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> [] \<Longrightarrow> \<Turnstile> Stars vs : STAR r"
+
+inductive_cases Prf_elims:
+  "\<Turnstile> v : ZERO"
+  "\<Turnstile> v : SEQ r1 r2"
+  "\<Turnstile> v : ALTS rs"
+  "\<Turnstile> v : ONE"
+  "\<Turnstile> v : CHAR c"
+  "\<Turnstile> vs : STAR r"
+
+lemma Prf_Stars_appendE:
+  assumes "\<Turnstile> Stars (vs1 @ vs2) : STAR r"
+  shows "\<Turnstile> Stars vs1 : STAR r \<and> \<Turnstile> Stars vs2 : STAR r" 
+using assms
+by (auto intro: Prf.intros elim!: Prf_elims)
+
+
+lemma Star_cval:
+  assumes "\<forall>s\<in>set ss. \<exists>v. s = flat v \<and> \<Turnstile> v : r"
+  shows "\<exists>vs. flats vs = concat ss \<and> (\<forall>v\<in>set vs. \<Turnstile> v : r \<and> flat v \<noteq> [])"
+using assms
+apply(induct ss)
+apply(auto)
+apply(rule_tac x="[]" in exI)
+apply(simp)
+apply(case_tac "flat v = []")
+apply(rule_tac x="vs" in exI)
+apply(simp)
+apply(rule_tac x="v#vs" in exI)
+apply(simp)
+done
+
+
+lemma L_flat_Prf1:
+  assumes "\<Turnstile> v : r" 
+  shows "flat v \<in> L r"
+using assms
+  apply(induct) 
+  apply(auto simp add: Sequ_def Star_concat)
+  done  
+
+lemma L_flat_Prf2:
+  assumes "s \<in> L r" 
+  shows "\<exists>v. \<Turnstile> v : r \<and> flat v = s"
+using assms
+proof(induct r arbitrary: s)
+  case (STAR r s)
+  have IH: "\<And>s. s \<in> L r \<Longrightarrow> \<exists>v. \<Turnstile> v : r \<and> flat v = s" by fact
+  have "s \<in> L (STAR r)" by fact
+  then obtain ss where "concat ss = s" "\<forall>s \<in> set ss. s \<in> L r \<and> s \<noteq> []"
+  using Star_cstring by auto  
+  then obtain vs where "flats vs = s" "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> flat v \<noteq> []"
+  using IH Star_cval by metis 
+  then show "\<exists>v. \<Turnstile> v : STAR r \<and> flat v = s"
+  using Prf.intros(5) flat_Stars by blast
+next 
+  case (SEQ r1 r2 s)
+  then show "\<exists>v. \<Turnstile> v : SEQ r1 r2 \<and> flat v = s"
+  unfolding Sequ_def L.simps by (fastforce intro: Prf.intros)
+next
+  case (ALTS rs s)
+  then show "\<exists>v. \<Turnstile> v : ALTS rs \<and> flat v = s"
+    unfolding L.simps 
+    apply(auto)
+    apply(case_tac rs)
+     apply(simp)
+    apply(simp)
+    apply(auto)
+     apply(drule_tac x="a" in meta_spec)
+     apply(simp)
+     apply(drule_tac x="s" in meta_spec)
+     apply(simp)
+     apply(erule exE)
+     apply(rule_tac x="Nth 0 v" in exI)
+     apply(simp)
+     apply(rule Prf.intros)
+      apply(simp)
+     apply(simp)
+    apply(drule_tac x="x" in meta_spec)
+    apply(simp)
+    apply(drule_tac x="s" in meta_spec)
+    apply(simp)
+    apply(erule exE)
+    apply(subgoal_tac "\<exists>n. nth list n = x \<and> n < length list")
+    apply(erule exE)
+     apply(rule_tac x="Nth (Suc n) v" in exI)
+     apply(simp)
+     apply(rule Prf.intros)
+      apply(simp)
+     apply(simp)
+    by (meson in_set_conv_nth)
+qed (auto intro: Prf.intros)
+
+
+lemma L_flat_Prf:
+  shows "L(r) = {flat v | v. \<Turnstile> v : r}"
+using L_flat_Prf1 L_flat_Prf2 by blast
+
+
+
+section {* Sets of Lexical Values *}
+
+text {*
+  Shows that lexical values are finite for a given regex and string.
+*}
+
+definition
+  LV :: "rexp \<Rightarrow> string \<Rightarrow> val set"
+where  "LV r s \<equiv> {v. \<Turnstile> v : r \<and> flat v = s}"
+
+lemma LV_simps:
+  shows "LV ZERO s = {}"
+  and   "LV ONE s = (if s = [] then {Void} else {})"
+  and   "LV (CHAR c) s = (if s = [c] then {Char c} else {})"
+unfolding LV_def
+by (auto intro: Prf.intros elim: Prf.cases)
+
+
+abbreviation
+  "Prefixes s \<equiv> {s'. prefix s' s}"
+
+abbreviation
+  "Suffixes s \<equiv> {s'. suffix s' s}"
+
+abbreviation
+  "SSuffixes s \<equiv> {s'. strict_suffix s' s}"
+
+lemma Suffixes_cons [simp]:
+  shows "Suffixes (c # s) = Suffixes s \<union> {c # s}"
+by (auto simp add: suffix_def Cons_eq_append_conv)
+
+
+lemma finite_Suffixes: 
+  shows "finite (Suffixes s)"
+by (induct s) (simp_all)
+
+lemma finite_SSuffixes: 
+  shows "finite (SSuffixes s)"
+proof -
+  have "SSuffixes s \<subseteq> Suffixes s"
+   unfolding strict_suffix_def suffix_def by auto
+  then show "finite (SSuffixes s)"
+   using finite_Suffixes finite_subset by blast
+qed
+
+lemma finite_Prefixes: 
+  shows "finite (Prefixes s)"
+proof -
+  have "finite (Suffixes (rev s))" 
+    by (rule finite_Suffixes)
+  then have "finite (rev ` Suffixes (rev s))" by simp
+  moreover
+  have "rev ` (Suffixes (rev s)) = Prefixes s"
+  unfolding suffix_def prefix_def image_def
+   by (auto)(metis rev_append rev_rev_ident)+
+  ultimately show "finite (Prefixes s)" by simp
+qed
+
+lemma LV_STAR_finite:
+  assumes "\<forall>s. finite (LV r s)"
+  shows "finite (LV (STAR r) s)"
+proof(induct s rule: length_induct)
+  fix s::"char list"
+  assume "\<forall>s'. length s' < length s \<longrightarrow> finite (LV (STAR r) s')"
+  then have IH: "\<forall>s' \<in> SSuffixes s. finite (LV (STAR r) s')"
+    by (force simp add: strict_suffix_def suffix_def) 
+  define f where "f \<equiv> \<lambda>(v, vs). Stars (v # vs)"
+  define S1 where "S1 \<equiv> \<Union>s' \<in> Prefixes s. LV r s'"
+  define S2 where "S2 \<equiv> \<Union>s2 \<in> SSuffixes s. Stars -` (LV (STAR r) s2)"
+  have "finite S1" using assms
+    unfolding S1_def by (simp_all add: finite_Prefixes)
+  moreover 
+  with IH have "finite S2" unfolding S2_def
+    by (auto simp add: finite_SSuffixes inj_on_def finite_vimageI)
+  ultimately 
+  have "finite ({Stars []} \<union> f ` (S1 \<times> S2))" by simp
+  moreover 
+  have "LV (STAR r) s \<subseteq> {Stars []} \<union> f ` (S1 \<times> S2)" 
+  unfolding S1_def S2_def f_def
+  unfolding LV_def image_def prefix_def strict_suffix_def
+  apply(auto)
+  apply(case_tac x)
+  apply(auto elim: Prf_elims)
+  apply(erule Prf_elims)
+  apply(auto)
+  apply(case_tac vs)
+  apply(auto intro: Prf.intros)  
+  apply(rule exI)
+  apply(rule conjI)
+  apply(rule_tac x="flat a" in exI)
+  apply(rule conjI)
+  apply(rule_tac x="flats list" in exI)
+  apply(simp)
+   apply(blast)
+  apply(simp add: suffix_def)
+  using Prf.intros(5) by blast  
+  ultimately
+  show "finite (LV (STAR r) s)" by (simp add: finite_subset)
+qed  
+    
+
+lemma LV_finite:
+  shows "finite (LV r s)"
+proof(induct r arbitrary: s)
+  case (ZERO s) 
+  show "finite (LV ZERO s)" by (simp add: LV_simps)
+next
+  case (ONE s)
+  show "finite (LV ONE s)" by (simp add: LV_simps)
+next
+  case (CHAR c s)
+  show "finite (LV (CHAR c) s)" by (simp add: LV_simps)
+next 
+  case (ALTS rs s)
+  then show "finite (LV (ALTS rs) s)" 
+    sorry
+next 
+  case (SEQ r1 r2 s)
+  define f where "f \<equiv> \<lambda>(v1, v2). Seq v1 v2"
+  define S1 where "S1 \<equiv> \<Union>s' \<in> Prefixes s. LV r1 s'"
+  define S2 where "S2 \<equiv> \<Union>s' \<in> Suffixes s. LV r2 s'"
+  have IHs: "\<And>s. finite (LV r1 s)" "\<And>s. finite (LV r2 s)" by fact+
+  then have "finite S1" "finite S2" unfolding S1_def S2_def
+    by (simp_all add: finite_Prefixes finite_Suffixes)
+  moreover
+  have "LV (SEQ r1 r2) s \<subseteq> f ` (S1 \<times> S2)"
+    unfolding f_def S1_def S2_def 
+    unfolding LV_def image_def prefix_def suffix_def
+    apply (auto elim!: Prf_elims)
+    by (metis (mono_tags, lifting) mem_Collect_eq)  
+  ultimately 
+  show "finite (LV (SEQ r1 r2) s)"
+    by (simp add: finite_subset)
+next
+  case (STAR r s)
+  then show "finite (LV (STAR r) s)" by (simp add: LV_STAR_finite)
+qed
+
+
+(*
+section {* Our POSIX Definition *}
+
+inductive 
+  Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
+where
+  Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
+| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
+| Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
+| Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
+| Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
+    \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r1 \<and> s\<^sub>4 \<in> L r2)\<rbrakk> \<Longrightarrow> 
+    (s1 @ s2) \<in> (SEQ r1 r2) \<rightarrow> (Seq v1 v2)"
+| Posix_STAR1: "\<lbrakk>s1 \<in> r \<rightarrow> v; s2 \<in> STAR r \<rightarrow> Stars vs; flat v \<noteq> [];
+    \<not>(\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> (s1 @ s\<^sub>3) \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))\<rbrakk>
+    \<Longrightarrow> (s1 @ s2) \<in> STAR r \<rightarrow> Stars (v # vs)"
+| Posix_STAR2: "[] \<in> STAR r \<rightarrow> Stars []"
+
+inductive_cases Posix_elims:
+  "s \<in> ZERO \<rightarrow> v"
+  "s \<in> ONE \<rightarrow> v"
+  "s \<in> CHAR c \<rightarrow> v"
+  "s \<in> ALT r1 r2 \<rightarrow> v"
+  "s \<in> SEQ r1 r2 \<rightarrow> v"
+  "s \<in> STAR r \<rightarrow> v"
+
+lemma Posix1:
+  assumes "s \<in> r \<rightarrow> v"
+  shows "s \<in> L r" "flat v = s"
+using assms
+by (induct s r v rule: Posix.induct)
+   (auto simp add: Sequ_def)
+
+text {*
+  Our Posix definition determines a unique value.
+*}
+
+lemma Posix_determ:
+  assumes "s \<in> r \<rightarrow> v1" "s \<in> r \<rightarrow> v2"
+  shows "v1 = v2"
+using assms
+proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
+  case (Posix_ONE v2)
+  have "[] \<in> ONE \<rightarrow> v2" by fact
+  then show "Void = v2" by cases auto
+next 
+  case (Posix_CHAR c v2)
+  have "[c] \<in> CHAR c \<rightarrow> v2" by fact
+  then show "Char c = v2" by cases auto
+next 
+  case (Posix_ALT1 s r1 v r2 v2)
+  have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
+  moreover
+  have "s \<in> r1 \<rightarrow> v" by fact
+  then have "s \<in> L r1" by (simp add: Posix1)
+  ultimately obtain v' where eq: "v2 = Left v'" "s \<in> r1 \<rightarrow> v'" by cases auto 
+  moreover
+  have IH: "\<And>v2. s \<in> r1 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
+  ultimately have "v = v'" by simp
+  then show "Left v = v2" using eq by simp
+next 
+  case (Posix_ALT2 s r2 v r1 v2)
+  have "s \<in> ALT r1 r2 \<rightarrow> v2" by fact
+  moreover
+  have "s \<notin> L r1" by fact
+  ultimately obtain v' where eq: "v2 = Right v'" "s \<in> r2 \<rightarrow> v'" 
+    by cases (auto simp add: Posix1) 
+  moreover
+  have IH: "\<And>v2. s \<in> r2 \<rightarrow> v2 \<Longrightarrow> v = v2" by fact
+  ultimately have "v = v'" by simp
+  then show "Right v = v2" using eq by simp
+next
+  case (Posix_SEQ s1 r1 v1 s2 r2 v2 v')
+  have "(s1 @ s2) \<in> SEQ r1 r2 \<rightarrow> v'" 
+       "s1 \<in> r1 \<rightarrow> v1" "s2 \<in> r2 \<rightarrow> v2"
+       "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r1 \<and> s\<^sub>4 \<in> L r2)" by fact+
+  then obtain v1' v2' where "v' = Seq v1' v2'" "s1 \<in> r1 \<rightarrow> v1'" "s2 \<in> r2 \<rightarrow> v2'"
+  apply(cases) apply (auto simp add: append_eq_append_conv2)
+  using Posix1(1) by fastforce+
+  moreover
+  have IHs: "\<And>v1'. s1 \<in> r1 \<rightarrow> v1' \<Longrightarrow> v1 = v1'"
+            "\<And>v2'. s2 \<in> r2 \<rightarrow> v2' \<Longrightarrow> v2 = v2'" by fact+
+  ultimately show "Seq v1 v2 = v'" by simp
+next
+  case (Posix_STAR1 s1 r v s2 vs v2)
+  have "(s1 @ s2) \<in> STAR r \<rightarrow> v2" 
+       "s1 \<in> r \<rightarrow> v" "s2 \<in> STAR r \<rightarrow> Stars vs" "flat v \<noteq> []"
+       "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s2 \<and> s1 @ s\<^sub>3 \<in> L r \<and> s\<^sub>4 \<in> L (STAR r))" by fact+
+  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \<in> r \<rightarrow> v'" "s2 \<in> (STAR r) \<rightarrow> (Stars vs')"
+  apply(cases) apply (auto simp add: append_eq_append_conv2)
+  using Posix1(1) apply fastforce
+  apply (metis Posix1(1) Posix_STAR1.hyps(6) append_Nil append_Nil2)
+  using Posix1(2) by blast
+  moreover
+  have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
+            "\<And>v2. s2 \<in> STAR r \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
+  ultimately show "Stars (v # vs) = v2" by auto
+next
+  case (Posix_STAR2 r v2)
+  have "[] \<in> STAR r \<rightarrow> v2" by fact
+  then show "Stars [] = v2" by cases (auto simp add: Posix1)
+qed
+
+
+text {*
+  Our POSIX value is a lexical value.
+*}
+
+lemma Posix_LV:
+  assumes "s \<in> r \<rightarrow> v"
+  shows "v \<in> LV r s"
+using assms unfolding LV_def
+apply(induct rule: Posix.induct)
+apply(auto simp add: intro!: Prf.intros elim!: Prf_elims)
+done
+*)
+
+
+end
\ No newline at end of file

author	Christian Urban <urbanc@in.tum.de>
	Tue, 21 Aug 2018 14:14:19 +0100
changeset 291	25b7d6bfd294
parent 290	ed3169a567ea
child 292	d688a01b8f89

thys/Journal/PaperExt.thy		file \| annotate \| diff \| comparison \| revisions
thys/SpecAlts.thy		file \| annotate \| diff \| comparison \| revisions