# HG changeset patch
# User Chengsong
# Date 1646847188 0
# Node ID a7e98deebb5c0d3361d060e3bbdcd0a89050ff5e
# Parent  c6351a96bf80646ee458739a66075c937070b5b9
restructured sizebound proof

diff -r c6351a96bf80 -r a7e98deebb5c ChengsongPhdThesis/ChengsongPhDThesis.tex
--- a/ChengsongPhdThesis/ChengsongPhDThesis.tex	Tue Mar 08 00:50:40 2022 +0000
+++ b/ChengsongPhdThesis/ChengsongPhDThesis.tex	Wed Mar 09 17:33:08 2022 +0000
@@ -38,6 +38,7 @@
 \def\lexer{\mathit{lexer}}
 \def\mkeps{\mathit{mkeps}}
 
+\def\DFA{\textit{DFA}}
 \def\bmkeps{\textit{bmkeps}}
 \def\retrieve{\textit{retrieve}}
 \def\blexer{\textit{blexer}}
@@ -55,7 +56,11 @@
 \def\S{\mathit{S}}
 \def\rup{r^\uparrow}
 
-
+\newcommand{\PDER}{\textit{PDER}}
+\newcommand{\flts}{\textit{flts}}
+\newcommand{\distinctBy}{\textit{distinctBy}}
+\newcommand{\map}{\textit{map}}
+\newcommand{\size}{\textit{size}}
 \def\awidth{\mathit{awidth}}
 \def\pder{\mathit{pder}}
 \def\maxterms{\mathit{maxterms}}
@@ -101,6 +106,7 @@
 
 \section{Introduction}
 \subsection{Basic Regex Introduction}
+
 Suppose (basic) regular expressions are given by the following grammar:
 \[			r ::=   \ZERO \mid  \ONE
 			 \mid  c  
@@ -117,12 +123,8 @@
 &				& $\textit{else} \; \textit{output} \; \textit{NO}$
 \end{tabular}
 \end{center}
-
-
-\subsection{The practical problem}
-Introduce the problem of matching a pattern with a string.
-Why important?
-Need to be fast, real-time, on large inputs.
+Omnipresent use of regexes in modern 
+software. 
 Examples: Snort, Bro, etc?
 \subsubsection{The rules for network intrusion analysis tools }
 TODO: read rules libraries such as Snort and the explanation for some of the rules
@@ -130,6 +132,28 @@
 TODO: any other libraries?
 
 
+There has been many widely used libraries such as 
+Henry Spencer's regexp(3), RE2, etc.
+They are fast and successful, but ugly corner cases
+allowing the $\textit{ReDoS}$ attack exist, and
+is a non-negligible protion.
+\subsection{The practical problem}
+These corner cases either
+\begin{itemize}
+\item
+go unnoticed until they
+cause considerable grief in real life
+\item
+or force the regex library writers to pose
+restrictions on the input, limiting the 
+choice a programmer has when using regexes.
+\end{itemize}
+
+Motivation:
+We want some library that supports as many constructs as possible,
+but still gives formal guarantees on the correctness and running
+time.
+
 \subsection{Regexes that brought down CloudFlare}
 
 
@@ -217,6 +241,29 @@
 suffix up to length $n$ of input string is.
 "Countdown States activation problem":
 $.*a.{100}$ requires $2^100$ + DFA states.
+Example:
+Converting $((a|b )*b.{10}){3}$ to a $\DFA$
+gives the error:
+\begin{verbatim}
+147972 states before minimization, 79107 states in minimized DFA
+Old file "search.java" saved as "search.java~"
+Writing code to "search.java"
+
+Unexpected exception encountered. This indicates a bug in JFlex.
+Please consider filing an issue at http://github.com/jflex-de/jflex/issues/new
+
+
+character value expected
+java.lang.IllegalArgumentException: character value expected
+        at jflex.generator.PackEmitter.emitUC(PackEmitter.java:105)
+        at jflex.generator.CountEmitter.emit(CountEmitter.java:116)
+        at jflex.generator.Emitter.emitDynamicInit(Emitter.java:530)
+        at jflex.generator.Emitter.emit(Emitter.java:1369)
+        at jflex.generator.LexGenerator.generate(LexGenerator.java:115)
+        at jflex.Main.generate(Main.java:320)
+        at jflex.Main.main(Main.java:336)
+\end{verbatim}
+
 \subsubsection{variant of DFA's}
 counting set automata 
 \\
@@ -471,22 +518,134 @@
 
 
 And we have a similar argument for the sequence case.
-\subsection{stronger simplification}
+\subsection{stronger simplification needed}
+
+\subsubsection{Bounded List of Terms}
+We have seen that without simplification the size of $(a+aa)^*$
+grows exponentially and unbounded(where we omit certain nested
+parentheses among the four terms in the last explicitly written out regex):
+
+\def\ll{\stackrel{\_\backslash{} a}{\longrightarrow}}
+\begin{center}
+\begin{tabular}{rll}
+$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
+& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+& $\ll$ & \ldots \hspace{15mm}(regular expressions of sizes 98, 169, 283, 468, 767, \ldots)
+\end{tabular}
+\end{center}
+
+But if we look closely at the regex
+\begin{center}
+\begin{tabular}{rll}
+& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
+\end{tabular}
+\end{center}
+we realize that:
+\begin{itemize}
+\item
+The regex is  equivalent to an alternative taking a long-flattened list, 
+where each list is a sequence, and the second child of that sequence
+is always $(a+aa)^*$. In other words, the regex is a "linear combination"
+of terms of the form $(a+aa)\backslash s \cdot (a+aa)^*$ ($s$ is any string).
+\item
+The number of different terms of the shape $(a+aa) \backslash s \cdot (a+aa)^*$ is 
+bounded because the first child $(a+aa) \backslash s$ can only be one of
+$(\ZERO + \ZERO{}a + \ZERO)$, $(\ZERO + \ZERO{}a + \ONE)$, 
+$(\ONE + \ONE{}a)$ and $(\ZERO + \ZERO{}a)$.
+\item
+With simplification we have that the regex is additionally reduced to,
+
+where each term $\bsimp((a+aa)\backslash s )  $
+is further reduced to only
+$\ONE$, $\ONE + a$ and $\ZERO$.
+
+
+\end{itemize}
+Generalizing this to any regular expression of the form 
+$\sum_{s\in L(r)} \bsimp(r\backslash s ) \cdot r^*$,
+we have the closed-form for star regex's string derivative as below:
+$\forall r \;s.\; \exists sl. \; s.t.\;\bsimp(r^* \backslash s) = \bsimp(\sum_{s'\in sl}(r\backslash s') \cdot r^* )$.
+
+The regex $\bsimp(\sum_{s' \in sl}(r\backslash s') \cdot r^*)$ is bounded by
+$\distinctBy(\flts(\sum_{s'\in sl}(\bsimp(r \backslash s')) \cdot r^*))$,
+which again is bounded by $\distinctBy(\sum_{s'\in sl}(\bsimp(r\backslash s')) \cdot r^*)$.
+This might give us a polynomial bound on the length of the list
+$\distinctBy[(\bsimp(r\backslash s')) \cdot r^* | {s'\in sl} ]$, if the terms in 
+$\distinctBy[(\bsimp (r\backslash s')) | {s' \in sl}]$ has a polynomial bound.
+This is unfortunately not true under our current $\distinctBy$ function:
+If we let $r_n = ( (aa)^* + (aaa)^* + \ldots + \underline{(a\ldots a)^*}{n \,a's}) $,
+then we have that $\textit{maxterms} r_n = \textit{sup} (\textit{length} [\bsimp(r_n\backslash s') | s' \in sl]) = 
+L.C.M(1,\ldots, n)$. According to \href{http://oeis.org/A003418}{OEISA003418}
+this grows exponentially quickly. So we have found a regex $r_n$ where
+$\textit{maxterms} (r_n ^* \backslash s) \geq 2^{(n-1)}$.
+
+
+\subsubsection{stronger version of \distinctBy}
+\href{https://www.researchgate.net/publication/340874991_Partial_derivatives_of_regular_expressions_and_finite_automaton_constructions}{Antimirove}
+has proven a linear bound on the number of terms in the "partial derivatives" of a regular expression:
+\begin{center}
+$\size (\PDER(r))  \leq \awidth (r)$.
+\end{center}
+
+The proof is by structural induction on the regular expression r.
+The hard cases are the sequence case $r_1\cdot r_2$ and star case $r^*$.
+The central idea that allows the induction to go through for this bound is on the inclusion:
+\begin{center}
+$\pder_{s@[c]} (a\cdot b) \subseteq (\pder_{s@[c]} a ) \cdot b \cup (\bigcup_{s' \in Suf(s@[c])} (\pder_{s'} \; b))$
+\end{center} 
+
+This way, 
+
+\begin{center}
+\begin{tabular}{lcl}
+$| \pder_{s@[c]} (a\cdot b) |$ & $ \leq$ & $ | (\pder_{s@[c]} a ) \cdot b \cup (\bigcup_{s' \in Suf(s@[c])} (\pder_{s'} \; b))|$\\
+& $\leq$ & $| (\pder_{s@[c]} a ) \cdot b|  + | (\bigcup_{s' \in Suf(s@[c])} (\pder_{s'} \; b))|$\\
+& $=$ & $\awidth(a) + \awidth(b)$ \\
+& $=$ & $\awidth(a+b)$
+\end{tabular}
+\end{center} 
+
+we have that a compound regular expression $a\cdot b$'s subterms
+ is bounded by its sub-expression's derivatives terms.
+ 
+This argument can be modified to bound the terms in 
+our  version of regex with strong simplification:
+\begin{center}
+\begin{tabular}{lcl}
+$| \maxterms (\bsimp  (a\cdot b) \backslash s)|$ & $=$ & $ |maxterms(\bsimp( (a\backslash s \cdot b) + \sum_{s'\in sl}(b\backslash s') ))|$\\
+& $\leq$ & $| (\pder_{s@[c]} a ) \cdot b|  + | (\bigcup_{s' \in Suf(s@[c])} (\pder_{s'} \; b))|$\\
+& $=$ & $\awidth(a) + \awidth(b)$ \\
+& $=$ & $\awidth(a+b)$
+\end{tabular}
+\end{center} 
+
+
 
 
 \subsection{cubic bound}
+Bounding the regex's subterms by
+its alphabetic width.
+
+The entire expression's size can be bounded by
+number of subterms times each subterms' size.
 
 
 
 
 
-
-
-
+\section{Support for bounded repetitions and other constructs}
+Example: 
+$.*a.\{100\}$ after translation to $\DFA$ and minimization will 
+always take over $2^{100}$ states.
 
-
+\section{Towards a library with fast running time practically}
 
-\subsection{Too Detailed too basic? regex to NFA translation}
+registers and cache-related optimizations?
+JVM related optimizations?
+
+\section{Past Report materials}
 
 Deciding whether a string is in the language of the regex
 can be intuitively done by constructing an NFA\cite{Thompson_1968}:
diff -r c6351a96bf80 -r a7e98deebb5c thys2/BasicIdentities.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys2/BasicIdentities.thy	Wed Mar 09 17:33:08 2022 +0000
@@ -0,0 +1,441 @@
+theory BasicIdentities imports 
+"Lexer"  "PDerivs"
+begin
+
+datatype rrexp = 
+  RZERO
+| RONE 
+| RCHAR char
+| RSEQ rrexp rrexp
+| RALTS "rrexp list"
+| RSTAR rrexp
+
+abbreviation
+  "RALT r1 r2 \<equiv> RALTS [r1, r2]"
+
+
+
+fun
+ rnullable :: "rrexp \<Rightarrow> bool"
+where
+  "rnullable (RZERO) = False"
+| "rnullable (RONE  ) = True"
+| "rnullable (RCHAR   c) = False"
+| "rnullable (RALTS   rs) = (\<exists>r \<in> set rs. rnullable r)"
+| "rnullable (RSEQ  r1 r2) = (rnullable r1 \<and> rnullable r2)"
+| "rnullable (RSTAR   r) = True"
+
+
+fun
+ rder :: "char \<Rightarrow> rrexp \<Rightarrow> rrexp"
+where
+  "rder c (RZERO) = RZERO"
+| "rder c (RONE) = RZERO"
+| "rder c (RCHAR d) = (if c = d then RONE else RZERO)"
+| "rder c (RALTS rs) = RALTS (map (rder c) rs)"
+| "rder c (RSEQ r1 r2) = 
+     (if rnullable r1
+      then RALT   (RSEQ (rder c r1) r2) (rder c r2)
+      else RSEQ   (rder c r1) r2)"
+| "rder c (RSTAR r) = RSEQ  (rder c r) (RSTAR r)"
+
+
+fun 
+  rders :: "rrexp \<Rightarrow> string \<Rightarrow> rrexp"
+where
+  "rders r [] = r"
+| "rders r (c#s) = rders (rder c r) s"
+
+fun rdistinct :: "'a list \<Rightarrow>'a set \<Rightarrow> 'a list"
+  where
+  "rdistinct [] acc = []"
+| "rdistinct (x#xs)  acc = 
+     (if x \<in> acc then rdistinct xs  acc 
+      else x # (rdistinct xs  ({x} \<union> acc)))"
+
+
+
+
+
+fun rflts :: "rrexp list \<Rightarrow> rrexp list"
+  where 
+  "rflts [] = []"
+| "rflts (RZERO # rs) = rflts rs"
+| "rflts ((RALTS rs1) # rs) = rs1 @ rflts rs"
+| "rflts (r1 # rs) = r1 # rflts rs"
+
+
+fun rsimp_ALTs :: " rrexp list \<Rightarrow> rrexp"
+  where
+  "rsimp_ALTs  [] = RZERO"
+| "rsimp_ALTs [r] =  r"
+| "rsimp_ALTs rs = RALTS rs"
+
+fun rsimp_SEQ :: " rrexp \<Rightarrow> rrexp \<Rightarrow> rrexp"
+  where
+  "rsimp_SEQ  RZERO _ = RZERO"
+| "rsimp_SEQ  _ RZERO = RZERO"
+| "rsimp_SEQ RONE r2 = r2"
+| "rsimp_SEQ r1 r2 = RSEQ r1 r2"
+
+
+fun rsimp :: "rrexp \<Rightarrow> rrexp" 
+  where
+  "rsimp (RSEQ r1 r2) = rsimp_SEQ  (rsimp r1) (rsimp r2)"
+| "rsimp (RALTS rs) = rsimp_ALTs  (rdistinct (rflts (map rsimp rs))  {}) "
+| "rsimp r = r"
+
+
+fun 
+  rders_simp :: "rrexp \<Rightarrow> string \<Rightarrow> rrexp"
+where
+  "rders_simp r [] = r"
+| "rders_simp r (c#s) = rders_simp (rsimp (rder c r)) s"
+
+fun rsize :: "rrexp \<Rightarrow> nat" where
+  "rsize RZERO = 1"
+| "rsize (RONE) = 1" 
+| "rsize (RCHAR  c) = 1"
+| "rsize (RALTS  rs) = Suc (sum_list (map rsize rs))"
+| "rsize (RSEQ  r1 r2) = Suc (rsize r1 + rsize r2)"
+| "rsize (RSTAR  r) = Suc (rsize r)"
+
+
+lemma rder_rsimp_ALTs_commute:
+  shows "  (rder x (rsimp_ALTs rs)) = rsimp_ALTs (map (rder x) rs)"
+  apply(induct rs)
+   apply simp
+  apply(case_tac rs)
+   apply simp
+  apply auto
+  done
+
+
+lemma rsimp_aalts_smaller:
+  shows "rsize (rsimp_ALTs  rs) \<le> rsize (RALTS rs)"
+  apply(induct rs)
+   apply simp
+  apply simp
+  apply(case_tac "rs = []")
+   apply simp
+  apply(subgoal_tac "\<exists>rsp ap. rs = ap # rsp")
+   apply(erule exE)+
+   apply simp
+  apply simp
+  by(meson neq_Nil_conv)
+  
+
+
+
+
+lemma rSEQ_mono:
+  shows "rsize (rsimp_SEQ r1 r2) \<le>rsize ( RSEQ r1 r2)"
+  apply auto
+  apply(induct r1)
+       apply auto
+      apply(case_tac "r2")
+       apply simp_all
+     apply(case_tac r2)
+          apply simp_all
+     apply(case_tac r2)
+         apply simp_all
+     apply(case_tac r2)
+        apply simp_all
+     apply(case_tac r2)
+  apply simp_all
+  done
+
+lemma ralts_cap_mono:
+  shows "rsize (RALTS rs) \<le> Suc ( sum_list (map rsize rs)) "
+  by simp
+
+lemma rflts_def_idiot:
+  shows "\<lbrakk> a \<noteq> RZERO; \<nexists>rs1. a = RALTS rs1\<rbrakk>
+       \<Longrightarrow> rflts (a # rs) = a # rflts rs"
+  apply(case_tac a)
+       apply simp_all
+  done
+
+
+lemma rflts_mono:
+  shows "sum_list (map rsize (rflts rs))\<le> sum_list (map rsize rs)"
+  apply(induct rs)
+  apply simp
+  apply(case_tac "a = RZERO")
+   apply simp
+  apply(case_tac "\<exists>rs1. a = RALTS rs1")
+  apply(erule exE)
+   apply simp
+  apply(subgoal_tac "rflts (a # rs) = a # (rflts rs)")
+  prefer 2
+  using rflts_def_idiot apply blast
+  apply simp
+  done
+
+lemma rdistinct_smaller: shows "sum_list (map rsize (rdistinct rs ss)) \<le>
+sum_list (map rsize rs )"
+  apply (induct rs arbitrary: ss)
+   apply simp
+  by (simp add: trans_le_add2)
+
+lemma rdistinct_phi_smaller: "sum_list (map rsize (rdistinct rs {})) \<le> sum_list (map rsize rs)"
+  by (simp add: rdistinct_smaller)
+
+
+lemma rsimp_alts_mono :
+  shows "\<And>x. (\<And>xa. xa \<in> set x \<Longrightarrow> rsize (rsimp xa) \<le> rsize xa)  \<Longrightarrow>
+rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {})) \<le> Suc (sum_list (map rsize x))"
+  apply(subgoal_tac "rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {} )) 
+                    \<le> rsize (RALTS (rdistinct (rflts (map rsimp x)) {} ))")
+  prefer 2
+  using rsimp_aalts_smaller apply auto[1]
+  apply(subgoal_tac "rsize (RALTS (rdistinct (rflts (map rsimp x)) {})) \<le>Suc( sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})))")
+  prefer 2
+  using ralts_cap_mono apply blast
+  apply(subgoal_tac "sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})) \<le>
+                     sum_list (map rsize ( (rflts (map rsimp x))))")
+  prefer 2
+  using rdistinct_smaller apply presburger
+  apply(subgoal_tac "sum_list (map rsize (rflts (map rsimp x))) \<le> 
+                     sum_list (map rsize (map rsimp x))")
+  prefer 2
+  using rflts_mono apply blast
+  apply(subgoal_tac "sum_list (map rsize (map rsimp x)) \<le> sum_list (map rsize x)")
+  prefer 2
+  
+  apply (simp add: sum_list_mono)
+  by linarith
+
+
+
+
+
+lemma rsimp_mono:
+  shows "rsize (rsimp r) \<le> rsize r"
+  apply(induct r)
+  apply simp_all
+  apply(subgoal_tac "rsize (rsimp_SEQ (rsimp r1) (rsimp r2)) \<le> rsize (RSEQ (rsimp r1) (rsimp r2))")
+    apply force
+  using rSEQ_mono
+   apply presburger
+  using rsimp_alts_mono by auto
+
+lemma idiot:
+  shows "rsimp_SEQ RONE r = r"
+  apply(case_tac r)
+       apply simp_all
+  done
+
+lemma no_alt_short_list_after_simp:
+  shows "RALTS rs = rsimp r \<Longrightarrow> rsimp_ALTs rs = RALTS rs"
+  sorry
+
+lemma no_further_dB_after_simp:
+  shows "RALTS rs = rsimp r \<Longrightarrow> rdistinct rs {} = rs"
+  sorry
+
+
+lemma idiot2:
+  shows " \<lbrakk>r1 \<noteq> RZERO; r1 \<noteq> RONE;r2 \<noteq> RZERO\<rbrakk>
+    \<Longrightarrow> rsimp_SEQ r1 r2 = RSEQ r1 r2"
+  apply(case_tac r1)
+       apply(case_tac r2)
+  apply simp_all
+     apply(case_tac r2)
+  apply simp_all
+     apply(case_tac r2)
+  apply simp_all
+   apply(case_tac r2)
+  apply simp_all
+  apply(case_tac r2)
+       apply simp_all
+  done
+
+lemma rders__onechar:
+  shows " (rders_simp r [c]) =  (rsimp (rders r [c]))"
+  by simp
+
+lemma rders_append:
+  "rders c (s1 @ s2) = rders (rders c s1) s2"
+  apply(induct s1 arbitrary: c s2)
+  apply(simp_all)
+  done
+
+lemma rders_simp_append:
+  "rders_simp c (s1 @ s2) = rders_simp (rders_simp c s1) s2"
+  apply(induct s1 arbitrary: c s2)
+  apply(simp_all)
+  done
+
+lemma inside_simp_removal:
+  shows " rsimp (rder x (rsimp r)) = rsimp (rder x r)"
+  sorry
+
+lemma set_related_list:
+  shows "distinct rs  \<Longrightarrow> length rs = card (set rs)"
+  by (simp add: distinct_card)
+(*this section deals with the property of distinctBy: creates a list without duplicates*)
+lemma rdistinct_never_added_twice:
+  shows "rdistinct (a # rs) {a} = rdistinct rs {a}"
+  by force
+
+
+lemma rdistinct_does_the_job:
+  shows "distinct (rdistinct rs s)"
+  apply(induct rs arbitrary: s)
+   apply simp
+  apply simp
+  sorry
+
+lemma rders_simp_same_simpders:
+  shows "s \<noteq> [] \<Longrightarrow> rders_simp r s = rsimp (rders r s)"
+  apply(induct s rule: rev_induct)
+   apply simp
+  apply(case_tac "xs = []")
+   apply simp
+  apply(simp add: rders_append rders_simp_append)
+  using inside_simp_removal by blast
+
+lemma simp_helps_der_pierce:
+  shows " rsimp
+            (rder x
+              (rsimp_ALTs rs)) = 
+          rsimp 
+            (rsimp_ALTs 
+              (map (rder x )
+                rs
+              )
+            )"
+  sorry
+
+
+lemma rders_simp_one_char:
+  shows "rders_simp r [c] = rsimp (rder c r)"
+  apply auto
+  done
+
+lemma rsimp_idem:
+  shows "rsimp (rsimp r) = rsimp r"
+  sorry
+
+corollary rsimp_inner_idem1:
+  shows "rsimp r = RSEQ r1 r2 \<Longrightarrow> rsimp r1 = r1 \<and> rsimp r2 = r2"
+  
+  sorry
+
+corollary rsimp_inner_idem2:
+  shows "rsimp r = RALTS rs \<Longrightarrow> \<forall>r' \<in> (set rs). rsimp r' = r'"
+  sorry
+
+corollary rsimp_inner_idem3:
+  shows "rsimp r = RALTS rs \<Longrightarrow> map rsimp rs = rs"
+  by (meson map_idI rsimp_inner_idem2)
+
+corollary rsimp_inner_idem4:
+  shows "rsimp r = RALTS rs \<Longrightarrow> flts rs = rs"
+  sorry
+
+
+lemma head_one_more_simp:
+  shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)"
+  by (simp add: rsimp_idem)
+
+lemma head_one_more_dersimp:
+  shows "map rsimp ((rder x (rders_simp r s) # rs)) = map rsimp ((rders_simp r (s@[x]) ) # rs)"
+  using head_one_more_simp rders_simp_append rders_simp_one_char by presburger
+
+
+
+
+lemma ders_simp_nullability:
+  shows "rnullable (rders r s) = rnullable (rders_simp r s)"
+  sorry
+
+lemma  first_elem_seqder:
+  shows "\<not>rnullable r1p \<Longrightarrow> map rsimp (rder x (RSEQ r1p r2)
+                   # rs) = map rsimp ((RSEQ (rder x r1p) r2) # rs) "
+  by auto
+
+lemma first_elem_seqder1:
+  shows  "\<not>rnullable (rders_simp r xs) \<Longrightarrow> map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = 
+                                          map rsimp ( (RSEQ (rsimp (rder x (rders_simp r xs))) r2) # rs)"
+  by (simp add: rsimp_idem)
+
+lemma first_elem_seqdersimps:
+  shows "\<not>rnullable (rders_simp r xs) \<Longrightarrow> map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = 
+                                          map rsimp ( (RSEQ (rders_simp r (xs @ [x])) r2) # rs)"
+  using first_elem_seqder1 rders_simp_append by auto
+
+
+
+
+
+
+lemma seq_ders_closed_form1:
+  shows "\<exists>Ss. rders_simp (RSEQ r1 r2) [c] = rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # 
+(map ( rders_simp r2 ) Ss)))"
+  apply(case_tac "rnullable r1")
+   apply(subgoal_tac " rders_simp (RSEQ r1 r2) [c] = 
+rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [[c]])))")
+    prefer 2
+    apply (simp add: rsimp_idem)
+   apply(rule_tac x = "[[c]]" in exI)
+   apply simp
+  apply(subgoal_tac  " rders_simp (RSEQ r1 r2) [c] = 
+rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [])))")
+   apply blast
+  apply(simp add: rsimp_idem)
+  sorry
+
+
+
+
+
+
+
+
+lemma simp_flatten2:
+  shows "rsimp (RALTS (r # [RALTS rs])) = rsimp (RALTS (r # rs))"
+  sorry
+
+
+lemma simp_flatten:
+  shows "rsimp (RALTS ((RALTS rsa) # rsb)) = rsimp (RALTS (rsa @ rsb))"
+
+  sorry
+
+
+
+fun vsuf :: "char list \<Rightarrow> rrexp \<Rightarrow> char list list" where
+"vsuf [] _ = []"
+|"vsuf (c#cs) r1 = (if (rnullable r1) then  (vsuf cs (rder c r1)) @ [c # cs]
+                                      else  (vsuf cs (rder c r1))
+                   ) "
+
+
+
+
+
+
+fun star_update :: "char \<Rightarrow> rrexp \<Rightarrow> char list list \<Rightarrow> char list list" where
+"star_update c r [] = []"
+|"star_update c r (s # Ss) = (if (rnullable (rders_simp r s)) 
+                                then (s@[c]) # [c] # (star_update c r Ss) 
+                               else   (s@[c]) # (star_update c r Ss) )"
+
+fun star_updates :: "char list \<Rightarrow> rrexp \<Rightarrow> char list list \<Rightarrow> char list list"
+  where
+"star_updates [] r Ss = Ss"
+| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)"
+
+
+
+
+
+
+(*some basic facts about rsimp*)
+
+
+
+
+end
\ No newline at end of file
diff -r c6351a96bf80 -r a7e98deebb5c thys2/ClosedForms.thy
--- a/thys2/ClosedForms.thy	Tue Mar 08 00:50:40 2022 +0000
+++ b/thys2/ClosedForms.thy	Wed Mar 09 17:33:08 2022 +0000
@@ -1,117 +1,36 @@
-
-theory ClosedForms
-  imports "Lexer" "PDerivs" 
+theory ClosedForms imports
+"BasicIdentities"
 begin
 
+lemma alts_closed_form: shows 
+"rsimp (rders_simp (RALTS rs) s) = 
+rsimp (RALTS (map (\<lambda>r. rders_simp r s) rs))"
+  apply(induct s rule: rev_induct)
+   apply simp
+  apply simp
+  apply(subst rders_simp_append)
+  apply(subgoal_tac " rsimp (rders_simp (rders_simp (RALTS rs) xs) [x]) = 
+ rsimp(rders_simp (rsimp_ALTs (rdistinct (rflts (map (rsimp \<circ> (\<lambda>r. rders_simp r xs)) rs)) {})) [x])")
+   prefer 2
+  apply (metis inside_simp_removal rders_simp_one_char)
+  apply(simp only: )
+  sorry
 
-datatype rrexp = 
-  RZERO
-| RONE 
-| RCHAR char
-| RSEQ rrexp rrexp
-| RALTS "rrexp list"
-| RSTAR rrexp
-
-abbreviation
-  "RALT r1 r2 \<equiv> RALTS [r1, r2]"
+lemma alts_closed_form_variant: shows 
+"s \<noteq> [] \<Longrightarrow> rders_simp (RALTS rs) s = 
+rsimp (RALTS (map (\<lambda>r. rders_simp r s) rs))"
+  sorry
 
 
 
-fun
- rnullable :: "rrexp \<Rightarrow> bool"
-where
-  "rnullable (RZERO) = False"
-| "rnullable (RONE  ) = True"
-| "rnullable (RCHAR   c) = False"
-| "rnullable (RALTS   rs) = (\<exists>r \<in> set rs. rnullable r)"
-| "rnullable (RSEQ  r1 r2) = (rnullable r1 \<and> rnullable r2)"
-| "rnullable (RSTAR   r) = True"
-
-
-fun
- rder :: "char \<Rightarrow> rrexp \<Rightarrow> rrexp"
-where
-  "rder c (RZERO) = RZERO"
-| "rder c (RONE) = RZERO"
-| "rder c (RCHAR d) = (if c = d then RONE else RZERO)"
-| "rder c (RALTS rs) = RALTS (map (rder c) rs)"
-| "rder c (RSEQ r1 r2) = 
-     (if rnullable r1
-      then RALT   (RSEQ (rder c r1) r2) (rder c r2)
-      else RSEQ   (rder c r1) r2)"
-| "rder c (RSTAR r) = RSEQ  (rder c r) (RSTAR r)"
-
-
-fun 
-  rders :: "rrexp \<Rightarrow> string \<Rightarrow> rrexp"
-where
-  "rders r [] = r"
-| "rders r (c#s) = rders (rder c r) s"
-
-fun rdistinct :: "'a list \<Rightarrow>'a set \<Rightarrow> 'a list"
-  where
-  "rdistinct [] acc = []"
-| "rdistinct (x#xs)  acc = 
-     (if x \<in> acc then rdistinct xs  acc 
-      else x # (rdistinct xs  ({x} \<union> acc)))"
-
-
-
+lemma star_closed_form:
+  shows "rders_simp (RSTAR r0) (c#s) = 
+rsimp ( RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates s r0 [[c]]) ) ))"
+  apply(induct s)
+   apply simp
+  sorry
 
 
-fun rflts :: "rrexp list \<Rightarrow> rrexp list"
-  where 
-  "rflts [] = []"
-| "rflts (RZERO # rs) = rflts rs"
-| "rflts ((RALTS rs1) # rs) = rs1 @ rflts rs"
-| "rflts (r1 # rs) = r1 # rflts rs"
-
-
-fun rsimp_ALTs :: " rrexp list \<Rightarrow> rrexp"
-  where
-  "rsimp_ALTs  [] = RZERO"
-| "rsimp_ALTs [r] =  r"
-| "rsimp_ALTs rs = RALTS rs"
-
-fun rsimp_SEQ :: " rrexp \<Rightarrow> rrexp \<Rightarrow> rrexp"
-  where
-  "rsimp_SEQ  RZERO _ = RZERO"
-| "rsimp_SEQ  _ RZERO = RZERO"
-| "rsimp_SEQ RONE r2 = r2"
-| "rsimp_SEQ r1 r2 = RSEQ r1 r2"
-
-
-fun rsimp :: "rrexp \<Rightarrow> rrexp" 
-  where
-  "rsimp (RSEQ r1 r2) = rsimp_SEQ  (rsimp r1) (rsimp r2)"
-| "rsimp (RALTS rs) = rsimp_ALTs  (rdistinct (rflts (map rsimp rs))  {}) "
-| "rsimp r = r"
-
-
-fun 
-  rders_simp :: "rrexp \<Rightarrow> string \<Rightarrow> rrexp"
-where
-  "rders_simp r [] = r"
-| "rders_simp r (c#s) = rders_simp (rsimp (rder c r)) s"
-
-fun rsize :: "rrexp \<Rightarrow> nat" where
-  "rsize RZERO = 1"
-| "rsize (RONE) = 1" 
-| "rsize (RCHAR  c) = 1"
-| "rsize (RALTS  rs) = Suc (sum_list (map rsize rs))"
-| "rsize (RSEQ  r1 r2) = Suc (rsize r1 + rsize r2)"
-| "rsize (RSTAR  r) = Suc (rsize r)"
-
-
-fun rlist_size :: "rrexp list \<Rightarrow> nat" where
-"rlist_size (r # rs) = rsize r + rlist_size rs" 
-| "rlist_size [] = 0"
-
-fun vsuf :: "char list \<Rightarrow> rrexp \<Rightarrow> char list list" where
-"vsuf [] _ = []"
-|"vsuf (c#cs) r1 = (if (rnullable r1) then  (vsuf cs (rder c r1)) @ [c # cs]
-                                      else  (vsuf cs (rder c r1))
-                   ) "
 
 lemma seq_closed_form: shows 
 "rsimp (rders_simp (RSEQ r1 r2) s) = 
@@ -124,883 +43,9 @@
   sorry
 
 
-fun star_update :: "char \<Rightarrow> rrexp \<Rightarrow> char list list \<Rightarrow> char list list" where
-"star_update c r [] = []"
-|"star_update c r (s # Ss) = (if (rnullable (rders_simp r s)) 
-                                then (s@[c]) # [c] # (star_update c r Ss) 
-                               else   (s@[c]) # (star_update c r Ss) )"
-
-fun star_updates :: "char list \<Rightarrow> rrexp \<Rightarrow> char list list \<Rightarrow> char list list"
-  where
-"star_updates [] r Ss = Ss"
-| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)"
-
-
-lemma star_closed_form:
-  shows "rders_simp (RSTAR r0) (c#s) = 
-rsimp ( RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates s r [[c]]) ) ))"
-  apply(induct s)
-   apply simp
-  sorry
-
-
-lemma star_closed_form_bounded_by_rdistinct_list_estimate:
-  shows "rsize (rsimp ( RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
-         (star_updates s r [[c]]) ) ))) \<le>
-        Suc (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
-         (star_updates s r [[c]]) ) {})  ) )"
-
-  sorry
-
-lemma distinct_list_rexp_up_to_certain_size_bouded_by_set_enumerating_up_to_that_size:
-  shows "\<forall>r\<in> set rs. (rsize r ) \<le> N \<Longrightarrow> sum_list (map rsize (rdistinct rs {})) \<le>
-         (card (rexp_enum N))* N"
-  sorry
-
-
-lemma ind_hypo_on_ders_leads_to_stars_bounded:
-  shows "\<forall>s. rsize (rders_simp r0 s) \<le> N \<Longrightarrow>        
-      (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
-         (star_updates s r [[c]]) ) {})  ) ) \<le> 
-(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))
-"
-  sorry
-
-lemma r0_bounded_star_bounded:
-  shows "\<forall>s. rsize (rders_simp r0 s) \<le> N \<Longrightarrow>
-             \<forall>s. rsize (rders_simp (RSTAR r0) s) \<le> 
-(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))"
-
-  sorry
-
-
-(*some basic facts about rsimp*)
-lemma hand_made_def_rlist_size:
-  shows "rlist_size rs = sum_list (map rsize rs)"
-proof (induct rs)
-  case Nil show ?case by simp
-next
-  case (Cons a rs) thus ?case
-    by simp
-qed
-
-lemma rder_rsimp_ALTs_commute:
-  shows "  (rder x (rsimp_ALTs rs)) = rsimp_ALTs (map (rder x) rs)"
-  apply(induct rs)
-   apply simp
-  apply(case_tac rs)
-   apply simp
-  apply auto
-  done
-
-
-lemma rsimp_aalts_smaller:
-  shows "rsize (rsimp_ALTs  rs) \<le> rsize (RALTS rs)"
-  apply(induct rs)
-   apply simp
-  apply simp
-  apply(case_tac "rs = []")
-   apply simp
-  apply(subgoal_tac "\<exists>rsp ap. rs = ap # rsp")
-   apply(erule exE)+
-   apply simp
-  apply simp
-  by(meson neq_Nil_conv)
-  
-
-
-
-
-lemma rSEQ_mono:
-  shows "rsize (rsimp_SEQ r1 r2) \<le>rsize ( RSEQ r1 r2)"
-  apply auto
-  apply(induct r1)
-       apply auto
-      apply(case_tac "r2")
-       apply simp_all
-     apply(case_tac r2)
-          apply simp_all
-     apply(case_tac r2)
-         apply simp_all
-     apply(case_tac r2)
-        apply simp_all
-     apply(case_tac r2)
-  apply simp_all
-  done
-
-lemma ralts_cap_mono:
-  shows "rsize (RALTS rs) \<le> Suc ( sum_list (map rsize rs)) "
-  by simp
-
-lemma rflts_def_idiot:
-  shows "\<lbrakk> a \<noteq> RZERO; \<nexists>rs1. a = RALTS rs1\<rbrakk>
-       \<Longrightarrow> rflts (a # rs) = a # rflts rs"
-  apply(case_tac a)
-       apply simp_all
-  done
-
-
-lemma rflts_mono:
-  shows "sum_list (map rsize (rflts rs))\<le> sum_list (map rsize rs)"
-  apply(induct rs)
-  apply simp
-  apply(case_tac "a = RZERO")
-   apply simp
-  apply(case_tac "\<exists>rs1. a = RALTS rs1")
-  apply(erule exE)
-   apply simp
-  apply(subgoal_tac "rflts (a # rs) = a # (rflts rs)")
-  prefer 2
-  using rflts_def_idiot apply blast
-  apply simp
-  done
-
-lemma rdistinct_smaller: shows "sum_list (map rsize (rdistinct rs ss)) \<le>
-sum_list (map rsize rs )"
-  apply (induct rs arbitrary: ss)
-   apply simp
-  by (simp add: trans_le_add2)
-
-lemma rdistinct_phi_smaller: "sum_list (map rsize (rdistinct rs {})) \<le> sum_list (map rsize rs)"
-  by (simp add: rdistinct_smaller)
-
-
-lemma rsimp_alts_mono :
-  shows "\<And>x. (\<And>xa. xa \<in> set x \<Longrightarrow> rsize (rsimp xa) \<le> rsize xa)  \<Longrightarrow>
-rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {})) \<le> Suc (sum_list (map rsize x))"
-  apply(subgoal_tac "rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {} )) 
-                    \<le> rsize (RALTS (rdistinct (rflts (map rsimp x)) {} ))")
-  prefer 2
-  using rsimp_aalts_smaller apply auto[1]
-  apply(subgoal_tac "rsize (RALTS (rdistinct (rflts (map rsimp x)) {})) \<le>Suc( sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})))")
-  prefer 2
-  using ralts_cap_mono apply blast
-  apply(subgoal_tac "sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})) \<le>
-                     sum_list (map rsize ( (rflts (map rsimp x))))")
-  prefer 2
-  using rdistinct_smaller apply presburger
-  apply(subgoal_tac "sum_list (map rsize (rflts (map rsimp x))) \<le> 
-                     sum_list (map rsize (map rsimp x))")
-  prefer 2
-  using rflts_mono apply blast
-  apply(subgoal_tac "sum_list (map rsize (map rsimp x)) \<le> sum_list (map rsize x)")
-  prefer 2
-  
-  apply (simp add: sum_list_mono)
-  by linarith
-
-
-
-
-
-lemma rsimp_mono:
-  shows "rsize (rsimp r) \<le> rsize r"
-  apply(induct r)
-  apply simp_all
-  apply(subgoal_tac "rsize (rsimp_SEQ (rsimp r1) (rsimp r2)) \<le> rsize (RSEQ (rsimp r1) (rsimp r2))")
-    apply force
-  using rSEQ_mono
-   apply presburger
-  using rsimp_alts_mono by auto
-
-lemma idiot:
-  shows "rsimp_SEQ RONE r = r"
-  apply(case_tac r)
-       apply simp_all
-  done
-
-lemma no_alt_short_list_after_simp:
-  shows "RALTS rs = rsimp r \<Longrightarrow> rsimp_ALTs rs = RALTS rs"
-  sorry
-
-lemma no_further_dB_after_simp:
-  shows "RALTS rs = rsimp r \<Longrightarrow> rdistinct rs {} = rs"
-  sorry
-
-
-lemma idiot2:
-  shows " \<lbrakk>r1 \<noteq> RZERO; r1 \<noteq> RONE;r2 \<noteq> RZERO\<rbrakk>
-    \<Longrightarrow> rsimp_SEQ r1 r2 = RSEQ r1 r2"
-  apply(case_tac r1)
-       apply(case_tac r2)
-  apply simp_all
-     apply(case_tac r2)
-  apply simp_all
-     apply(case_tac r2)
-  apply simp_all
-   apply(case_tac r2)
-  apply simp_all
-  apply(case_tac r2)
-       apply simp_all
-  done
-
-lemma rders__onechar:
-  shows " (rders_simp r [c]) =  (rsimp (rders r [c]))"
-  by simp
-
-lemma rders_append:
-  "rders c (s1 @ s2) = rders (rders c s1) s2"
-  apply(induct s1 arbitrary: c s2)
-  apply(simp_all)
-  done
-
-lemma rders_simp_append:
-  "rders_simp c (s1 @ s2) = rders_simp (rders_simp c s1) s2"
-  apply(induct s1 arbitrary: c s2)
-  apply(simp_all)
-  done
-
-lemma inside_simp_removal:
-  shows " rsimp (rder x (rsimp r)) = rsimp (rder x r)"
-  sorry
-
-lemma set_related_list:
-  shows "distinct rs  \<Longrightarrow> length rs = card (set rs)"
-  by (simp add: distinct_card)
-(*this section deals with the property of distinctBy: creates a list without duplicates*)
-lemma rdistinct_never_added_twice:
-  shows "rdistinct (a # rs) {a} = rdistinct rs {a}"
-  by force
-
-
-lemma rdistinct_does_the_job:
-  shows "distinct (rdistinct rs s)"
-  apply(induct rs arbitrary: s)
-   apply simp
-  apply simp
-  sorry
-
-lemma rders_simp_same_simpders:
-  shows "s \<noteq> [] \<Longrightarrow> rders_simp r s = rsimp (rders r s)"
-  apply(induct s rule: rev_induct)
-   apply simp
-  apply(case_tac "xs = []")
-   apply simp
-  apply(simp add: rders_append rders_simp_append)
-  using inside_simp_removal by blast
-
-lemma simp_helps_der_pierce:
-  shows " rsimp
-            (rder x
-              (rsimp_ALTs rs)) = 
-          rsimp 
-            (rsimp_ALTs 
-              (map (rder x )
-                rs
-              )
-            )"
-  sorry
-
-
-lemma rders_simp_one_char:
-  shows "rders_simp r [c] = rsimp (rder c r)"
-  apply auto
-  done
-
-lemma rsimp_idem:
-  shows "rsimp (rsimp r) = rsimp r"
-  sorry
-
-corollary rsimp_inner_idem1:
-  shows "rsimp r = RSEQ r1 r2 \<Longrightarrow> rsimp r1 = r1 \<and> rsimp r2 = r2"
-  
-  sorry
-
-corollary rsimp_inner_idem2:
-  shows "rsimp r = RALTS rs \<Longrightarrow> \<forall>r' \<in> (set rs). rsimp r' = r'"
-  sorry
-
-corollary rsimp_inner_idem3:
-  shows "rsimp r = RALTS rs \<Longrightarrow> map rsimp rs = rs"
-  by (meson map_idI rsimp_inner_idem2)
-
-corollary rsimp_inner_idem4:
-  shows "rsimp r = RALTS rs \<Longrightarrow> flts rs = rs"
-  sorry
-
-
-lemma head_one_more_simp:
-  shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)"
-  by (simp add: rsimp_idem)
-
-lemma head_one_more_dersimp:
-  shows "map rsimp ((rder x (rders_simp r s) # rs)) = map rsimp ((rders_simp r (s@[x]) ) # rs)"
-  using head_one_more_simp rders_simp_append rders_simp_one_char by presburger
-
-
-
-
-lemma ders_simp_nullability:
-  shows "rnullable (rders r s) = rnullable (rders_simp r s)"
-  sorry
-
-lemma  first_elem_seqder:
-  shows "\<not>rnullable r1p \<Longrightarrow> map rsimp (rder x (RSEQ r1p r2)
-                   # rs) = map rsimp ((RSEQ (rder x r1p) r2) # rs) "
-  by auto
-
-lemma first_elem_seqder1:
-  shows  "\<not>rnullable (rders_simp r xs) \<Longrightarrow> map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = 
-                                          map rsimp ( (RSEQ (rsimp (rder x (rders_simp r xs))) r2) # rs)"
-  by (simp add: rsimp_idem)
-
-lemma first_elem_seqdersimps:
-  shows "\<not>rnullable (rders_simp r xs) \<Longrightarrow> map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = 
-                                          map rsimp ( (RSEQ (rders_simp r (xs @ [x])) r2) # rs)"
-  using first_elem_seqder1 rders_simp_append by auto
-
-
-
-
-
-lemma seq_update_seq_ders:
-  shows "rsimp (rder c ( rsimp (RALTS ((RSEQ (rders_simp r1 s) r2) # 
-(map (rders_simp r2) Ss))))) = 
-         rsimp (RALTS ((RSEQ (rders_simp r1 (s @ [c])) r2) # 
-(map (rders_simp r2) (seq_update c (rders_simp r1 s) Ss))))  "
-  sorry
-
-lemma seq_ders_closed_form1:
-  shows "\<exists>Ss. rders_simp (RSEQ r1 r2) [c] = rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # 
-(map ( rders_simp r2 ) Ss)))"
-  apply(case_tac "rnullable r1")
-   apply(subgoal_tac " rders_simp (RSEQ r1 r2) [c] = 
-rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [[c]])))")
-    prefer 2
-    apply (simp add: rsimp_idem)
-   apply(rule_tac x = "[[c]]" in exI)
-   apply simp
-  apply(subgoal_tac  " rders_simp (RSEQ r1 r2) [c] = 
-rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [])))")
-   apply blast
-  apply(simp add: rsimp_idem)
-  sorry
-
-
-
-
-
-
-
-
-lemma simp_flatten2:
-  shows "rsimp (RALTS (r # [RALTS rs])) = rsimp (RALTS (r # rs))"
-  sorry
-
-
-lemma simp_flatten:
-  shows "rsimp (RALTS ((RALTS rsa) # rsb)) = rsimp (RALTS (rsa @ rsb))"
-
-  sorry
-
-
-
-(*^^^^^^^^^nullable_seq_with_list1 related ^^^^^^^^^^^^^^^^*)
-
-
-
-
-
-
-
-
-
-
-
-lemma non_zero_size:
-  shows "rsize r \<ge> Suc 0"
-  apply(induct r)
-  apply auto done
-
-corollary size_geq1:
-  shows "rsize r \<ge> 1"
-  by (simp add: non_zero_size)
-
-
-lemma rexp_size_induct:
-  shows "\<And>N r x5 a list.
-       \<lbrakk> rsize r = Suc N; r = RALTS x5;
-        x5 = a # list\<rbrakk>  \<Longrightarrow>\<exists>i j. rsize a = i \<and> rsize (RALTS list) = j \<and> i + j =  Suc N \<and> i \<le> N \<and> j \<le> N"
-  apply(rule_tac x = "rsize a" in exI)
-  apply(rule_tac x = "rsize (RALTS list)" in exI)
-  apply(subgoal_tac "rsize a \<ge> 1")
-   prefer 2
-  using One_nat_def non_zero_size apply presburger
-  apply(subgoal_tac "rsize (RALTS list) \<ge> 1 ")
-  prefer 2
-  using size_geq1 apply blast
-  apply simp
-  done
-
-definition SEQ_set where
-  "SEQ_set A n \<equiv> {RSEQ r1 r2 | r1 r2. r1 \<in> A \<and> r2 \<in> A \<and> rsize r1 + rsize r2 \<le> n}"
-
-definition SEQ_set_cartesian where
-"SEQ_set_cartesian A n  = {RSEQ r1 r2 | r1 r2. r1 \<in> A \<and> r2 \<in> A}"
-
-definition ALT_set where
-"ALT_set A n \<equiv> {RALTS rs | rs. set rs \<subseteq> A \<and> sum_list (map rsize rs) \<le> n}"
-
-
-definition
-  "sizeNregex N \<equiv> {r. rsize r \<le> N}"
-
-lemma sizenregex_induct:
-  shows "sizeNregex (Suc n) = sizeNregex n \<union> {RZERO, RONE, RALTS []} \<union> {RCHAR c| c. True} \<union>
-SEQ_set ( sizeNregex n) n \<union> ALT_set (sizeNregex n) n \<union> (RSTAR ` (sizeNregex n))"
+lemma seq_closed_form_variant: shows
+"s \<noteq> [] \<Longrightarrow> (rders_simp (RSEQ r1 r2) s) = 
+rsimp (RALTS ((RSEQ (rders_simp r1 s) r2) # (map (rders_simp r2) (vsuf s r1))))"
   sorry
 
-
-lemma chars_finite:
-  shows "finite (RCHAR ` (UNIV::(char set)))"
-  apply(simp)
-  done
-
-thm full_SetCompr_eq 
-
-lemma size1finite:
-  shows "finite (sizeNregex (Suc 0))"
-  apply(subst sizenregex_induct)
-  apply(subst finite_Un)+
-  apply(subgoal_tac "sizeNregex 0 = {}")
-  apply(rule conjI)+
-  apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def)
-      apply simp
-      apply (simp add: full_SetCompr_eq)
-  apply (simp add: SEQ_set_def)
-    apply (simp add: ALT_set_def)  
-   apply(simp add: full_SetCompr_eq)
-  using non_zero_size not_less_eq_eq sizeNregex_def by fastforce
-
-lemma seq_included_in_cart:
-  shows "SEQ_set A n \<subseteq> SEQ_set_cartesian A n"
-  using SEQ_set_cartesian_def SEQ_set_def by fastforce
-
-lemma finite_seq:
-  shows " finite (sizeNregex n) \<Longrightarrow> finite (SEQ_set (sizeNregex n) n)"
-  apply(rule finite_subset)
-  sorry
-
-
-lemma finite_size_n:
-  shows "finite (sizeNregex n)"
-  apply(induct n)
-  apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def)
-  apply(subst sizenregex_induct)
-  apply(subst finite_Un)+
-  apply(rule conjI)+
-       apply simp
-      apply simp
-     apply (simp add: full_SetCompr_eq)
-
-  sorry
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-lemma star_update_case1:
-  shows "rnullable (rders_simp r s) \<Longrightarrow> star_update c r (s # Ss) = (s @ [c]) # [c] # (star_update c r Ss)"
-  
-  by force
-
-lemma star_update_case2:
-  shows "\<not>rnullable (rders_simp r s) \<Longrightarrow> star_update c r (s # Ss) = (s @ [c]) # (star_update c r Ss)"
-  by simp
-
-lemma bubble_break: shows "rflts [r, RZERO] = rflts [r]"
-  apply(case_tac r)
-       apply simp+
-  done
-
-lemma rsimp_alts_idem_aux1:
-  shows "rsimp_ALTs (rdistinct (rflts [rsimp a]) {}) = rsimp (RALTS [a])"
-  by force
-
-
-
-lemma rsimp_alts_idem_aux2:
-  shows "rsimp a = rsimp (RALTS [a])"
-  apply(simp)
-  apply(case_tac "rsimp a")
-       apply simp+
-  apply (metis no_alt_short_list_after_simp no_further_dB_after_simp)
-  by simp
-
-lemma rsimp_alts_idem:
-  shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs (a # [(rsimp (rsimp_ALTs as))] ))"
-  apply(induct as)
-   apply(subgoal_tac "rsimp (rsimp_ALTs [a, rsimp (rsimp_ALTs [])]) = rsimp (rsimp_ALTs [a, RZERO])")
-  prefer 2
-    apply simp
-  using bubble_break rsimp_alts_idem_aux2 apply auto[1]
-  apply(case_tac as)
-   apply(subgoal_tac "rsimp_ALTs( aa # as) = aa")
-  prefer 2
-    apply simp
-  using head_one_more_simp apply fastforce
-  apply(subgoal_tac "rsimp_ALTs (aa # as) = RALTS (aa # as)")
-  prefer 2
-  
-  using rsimp_ALTs.simps(3) apply presburger
-  
-  apply(simp only:)
-  apply(subgoal_tac "rsimp_ALTs (a # aa # aaa # list) = RALTS (a # aa # aaa # list)")
-  prefer 2
-  using rsimp_ALTs.simps(3) apply presburger
-  apply(simp only:)
-  apply(subgoal_tac "rsimp_ALTs [a, rsimp (RALTS (aa # aaa # list))] = RALTS (a # [rsimp (RALTS (aa # aaa # list))])")
-  prefer 2
-  
-  using rsimp_ALTs.simps(3) apply presburger
-  apply(simp only:)
-  using simp_flatten2
-  apply(subgoal_tac " rsimp (RALT a (rsimp (RALTS (aa # aaa # list))))  =  rsimp (RALT a ((RALTS (aa # aaa # list)))) ")
-  prefer 2
-
-  apply (metis head_one_more_simp list.simps(9) rsimp.simps(2))
-  apply (simp only:)
-  done
-
-
-lemma rsimp_alts_idem2:
-  shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs ((rsimp a) # [(rsimp (rsimp_ALTs as))] ))"
-  using head_one_more_simp rsimp_alts_idem by auto
-
-
-lemma evolution_step1:
-  shows "rsimp
-        (rsimp_ALTs
-          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
-         rsimp 
-        (rsimp_ALTs
-          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [(rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)))]))   "
-  using rsimp_alts_idem by auto
-
-lemma evolution_step2:
-  assumes " rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
-       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))"
-  shows "rsimp 
-        (rsimp_ALTs 
-          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = 
-                 rsimp 
-        (rsimp_ALTs
-          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [ rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]))  "
-  by (simp add: assms rsimp_alts_idem)
-
-lemma rsimp_seq_aux1:
-  shows "r = RONE \<and> r2 = RSTAR r0 \<Longrightarrow> rsimp_SEQ r r2 = r2"
-  apply simp
-  done
-
-lemma multiple_alts_simp_flatten:
-  shows "rsimp (RALT (RALT r1 r2) (rsimp_ALTs rs)) = rsimp (RALTS (r1 # r2 # rs))"
-  by (metis Cons_eq_appendI append_self_conv2 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem simp_flatten)
-
-
-lemma evo3_main_aux1:
-  shows "rsimp
-            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) =
-           rsimp
-            (RALTS
-              (RSEQ (rders_simp r (a @ [x])) (RSTAR r) #
-               RSEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))"
-  apply(subgoal_tac "rsimp
-            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) =
-rsimp
-            (RALT (RALT (RSEQ ( (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) ")
-  prefer 2
-   apply (simp add: rsimp_idem)
-  apply (simp only:)
-  apply(subst multiple_alts_simp_flatten)
-  by simp
-
-
-lemma evo3_main_nullable:
-  shows "
-\<And>a Ss.
-       \<lbrakk>rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
-        rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)));
-        rders_simp r a \<noteq> RONE; rders_simp r a \<noteq> RZERO; rnullable (rders_simp r a)\<rbrakk>
-       \<Longrightarrow> rsimp
-            (rsimp_ALTs
-              [rder x (RSEQ (rders_simp r a) (RSTAR r)),
-               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
-           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
-  apply(subgoal_tac "rder x (RSEQ (rders_simp r a) (RSTAR r)) 
-                   = RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r))")
-  prefer 2
-   apply simp
-  apply(simp only:)
-  apply(subgoal_tac "star_update x r (a # Ss) = (a @ [x]) # [x] # (star_update x r Ss)")
-   prefer 2
-  using star_update_case1 apply presburger
-  apply(simp only:)
-  apply(subst List.list.map(2))+
-  apply(subgoal_tac "rsimp
-            (rsimp_ALTs
-              [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)),
-               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = 
-rsimp
-            (RALTS
-              [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)),
-               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))])")
-  prefer 2
-  using rsimp_ALTs.simps(3) apply presburger
-  apply(simp only:)
-  apply(subgoal_tac " rsimp
-            (rsimp_ALTs
-              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
-               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) 
-= 
- rsimp
-            (RALTS
-              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
-               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))")
-
-  prefer 2
-  using rsimp_ALTs.simps(3) apply presburger
-  apply (simp only:)
-  apply(subgoal_tac " rsimp
-            (RALT (RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ ( (rder x r)) (RSTAR r)))
-              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = 
-             rsimp
-            (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r)))
-              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))")
-  prefer 2
-   apply (simp add: rsimp_idem)
-  apply(simp only:)
-  apply(subgoal_tac "             rsimp
-            (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r)))
-              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = 
-             rsimp
-            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))")
-   prefer 2
-  using rders_simp_append rders_simp_one_char rsimp_idem apply presburger
-  apply(simp only:)
-  apply(subgoal_tac " rsimp
-            (RALTS
-              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
-               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) = 
- rsimp
-            (RALTS
-              (RSEQ (rders_simp r (a @ [x])) (RSTAR r) #
-               RSEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))")
-  prefer 2
-  apply (smt (z3) idiot2 list.simps(9) rrexp.distinct(9) rsimp.simps(1) rsimp.simps(2) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_idem)
-  apply(simp only:)
-  apply(subgoal_tac "      rsimp
-            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) =
-     rsimp
-            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
-              ( (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))  ")
-  prefer 2
-  using rsimp_idem apply force
-  apply(simp only:)
-  using evo3_main_aux1 by blast
-  
-
-lemma evo3_main_not1:
-  shows " \<not>rnullable (rders_simp r a) \<Longrightarrow> rder x (RSEQ (rders_simp r a) (RSTAR r)) = RSEQ (rder x (rders_simp r a)) (RSTAR r)"
-  by fastforce
-
-
-lemma evo3_main_not2:
-  shows "\<not>rnullable (rders_simp r a) \<Longrightarrow>  rsimp
-            (rsimp_ALTs
-              (rder x (RSEQ (rders_simp r a) (RSTAR r)) # rs)) = rsimp
-            (rsimp_ALTs
-              ((RSEQ (rders_simp r (a @ [x])) (RSTAR r)) # rs))"
-  by (simp add: rders_simp_append rsimp_alts_idem2 rsimp_idem)
-
-lemma evo3_main_not3:
-  shows "rsimp
-            (rsimp_ALTs
-              (rsimp_SEQ r1 (RSTAR r) # rs)) = 
-         rsimp (rsimp_ALTs
-              (RSEQ r1 (RSTAR r) # rs))"
-  by (metis idiot2 rrexp.distinct(9) rsimp.simps(1) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2)
-
-
-lemma evo3_main_notnullable:
-  shows "\<And>a Ss.
-       \<lbrakk>rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
-        rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)));
-        rders_simp r a \<noteq> RONE; rders_simp r a \<noteq> RZERO; \<not>rnullable (rders_simp r a)\<rbrakk>
-       \<Longrightarrow> rsimp
-            (rsimp_ALTs
-              [rder x (RSEQ (rders_simp r a) (RSTAR r)),
-               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
-           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
-  apply(subst star_update_case2)
-   apply simp
-  apply(subst List.list.map(2))
-  apply(subst evo3_main_not2)
-   apply simp
-  apply(subst evo3_main_not3)
-  using rsimp_alts_idem by presburger
-
-
-lemma evo3_aux2:
-  shows "rders_simp r a = RONE \<Longrightarrow> rsimp_SEQ (rders_simp (rders_simp r a) [x]) (RSTAR r) = RZERO"
-  by simp
-lemma evo3_aux3:
-  shows "rsimp (rsimp_ALTs (RZERO # rs)) = rsimp (rsimp_ALTs rs)"
-  by (metis list.simps(8) list.simps(9) rdistinct.simps(1) rflts.simps(1) rflts.simps(2) rsimp.simps(2) rsimp_ALTs.simps(1) rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem)
-
-lemma evo3_aux4:
-  shows " rsimp
-            (rsimp_ALTs
-              [RSEQ (rder x r) (RSTAR r),
-               rsimp (rsimp_ALTs rs)]) =
-           rsimp
-            (rsimp_ALTs
-              (rsimp_SEQ (rders_simp r [x]) (RSTAR r) # rs))"
-  by (metis rders_simp_one_char rsimp.simps(1) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2)
-
-lemma evo3_aux5:
-  shows "rders_simp r a \<noteq> RONE \<and> rders_simp r a \<noteq> RZERO \<Longrightarrow> rsimp_SEQ (rders_simp r a) (RSTAR r) = RSEQ (rders_simp r a) (RSTAR r)"
-  using idiot2 by blast
-
-
-lemma evolution_step3:
-  shows" \<And>a Ss.
-       rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
-       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) \<Longrightarrow>
-       rsimp
-        (rsimp_ALTs
-          [rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)),
-           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
-       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
-  apply(case_tac "rders_simp r a = RONE")
-   apply(subst rsimp_seq_aux1)
-    apply simp
-  apply(subst rder.simps(6))
-   apply(subgoal_tac "rnullable (rders_simp r a)")
-    prefer 2
-  using rnullable.simps(2) apply presburger
-   apply(subst star_update_case1)
-    apply simp
-
-   apply(subst List.list.map)+
-  apply(subst rders_simp_append)
-   apply(subst evo3_aux2)
-    apply simp
-   apply(subst evo3_aux3)
-   apply(subst evo3_aux4)
-   apply simp
-  apply(case_tac "rders_simp r a = RZERO")
-
-   apply (simp add: rsimp_alts_idem2)
-   apply(subgoal_tac "rders_simp r (a @ [x]) = RZERO")
-  prefer 2
-  using rder.simps(1) rders_simp_append rders_simp_one_char rsimp.simps(3) apply presburger
-  using rflts.simps(2) rsimp.simps(3) rsimp_SEQ.simps(1) apply presburger
-  apply(subst evo3_aux5)
-   apply simp
-  apply(case_tac "rnullable (rders_simp r a) ")
-  using evo3_main_nullable apply blast
-  using evo3_main_notnullable apply blast
-  done
-
-(*
-proof (prove)
-goal (1 subgoal):
- 1. map f (a # s) = f a # map f s 
-Auto solve_direct: the current goal can be solved directly with
-  HOL.nitpick_simp(115): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
-  List.list.map(2): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
-  List.list.simps(9): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
-*)
-lemma starseq_list_evolution:
-  fixes  r :: rrexp and Ss :: "char list list" and x :: char 
-  shows "rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss) ) =
-         rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))  )"   
-  apply(induct Ss)
-   apply simp
-  apply(subst List.list.map(2))
-  apply(subst evolution_step2)
-   apply simp
-
-
-  sorry
-
-
-lemma star_seqs_produce_star_seqs:
-  shows "rsimp (rsimp_ALTs (map (rder x \<circ> (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss))
-       = rsimp (rsimp_ALTs (map ( (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss))"
-  by (meson comp_apply)
-
-lemma map_der_lambda_composition:
-  shows "map (rder x) (map (\<lambda>s. f s) Ss) = map (\<lambda>s. (rder x (f s))) Ss"
-  by force
-
-lemma ralts_vs_rsimpalts:
-  shows "rsimp (RALTS rs) = rsimp (rsimp_ALTs rs)"
-  by (metis evo3_aux3 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) simp_flatten2)
-  
-
-lemma linearity_of_list_of_star_or_starseqs: 
-  fixes r::rrexp and Ss::"char list list" and x::char
-  shows "\<exists>Ssa. rsimp (rder x (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))) =
-                 rsimp (RALTS ( (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ssa)))"
-  apply(subst rder_rsimp_ALTs_commute)
-  apply(subst map_der_lambda_composition)
-  using starseq_list_evolution
-  apply(rule_tac x = "star_update x r Ss" in exI)
-  apply(subst ralts_vs_rsimpalts)
-  by simp
-
-
-
-(*certified correctness---does not depend on any previous sorry*)
-lemma star_list_push_der: shows  " \<lbrakk>xs \<noteq> [] \<Longrightarrow> \<exists>Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss));
-        xs @ [x] \<noteq> []; xs \<noteq> []\<rbrakk> \<Longrightarrow>
-     \<exists>Ss. rders_simp (RSTAR r ) (xs @ [x]) = 
-        rsimp (RALTS (map (\<lambda>s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) ) Ss) )"
-  apply(subgoal_tac  "\<exists>Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))")
-  prefer 2
-  apply blast
-  apply(erule exE)
-  apply(subgoal_tac "rders_simp (RSTAR r) (xs @ [x]) = rsimp (rder x (rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
-  prefer 2
-  using rders_simp_append
-  using rders_simp_one_char apply presburger
-  apply(rule_tac x= "Ss" in exI)
-  apply(subgoal_tac " rsimp (rder x (rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) = 
-                       rsimp (rsimp (rder x (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
-  prefer 2
-  using inside_simp_removal rsimp_idem apply presburger
-  apply(subgoal_tac "rsimp (rsimp (rder x (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) =
-                     rsimp (rsimp (RALTS (map (rder x) (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
-  prefer 2
-  using rder.simps(4) apply presburger
-  apply(subgoal_tac "rsimp (rsimp (RALTS (map (rder x) (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) =
-                     rsimp (rsimp (RALTS (map (\<lambda>s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss)))")
-   apply (metis rsimp_idem)
-  by (metis map_der_lambda_composition)
-
-
-
-end
+end
\ No newline at end of file
diff -r c6351a96bf80 -r a7e98deebb5c thys2/ClosedFormsBounds.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys2/ClosedFormsBounds.thy	Wed Mar 09 17:33:08 2022 +0000
@@ -0,0 +1,585 @@
+
+theory ClosedFormsBounds
+  imports "GeneralRegexBound" "ClosedForms"
+begin
+
+
+
+lemma alts_closed_form_bounded: shows
+"\<forall>r \<in> set rs. \<forall>s. rsize(rders_simp r s ) \<le> N \<Longrightarrow> 
+rsize (rders_simp (RALTS rs ) s) \<le> max (Suc ( N * (card (sizeNregex N)))) (rsize (RALTS rs) )"
+  apply(induct s)
+  apply simp
+  apply(insert alts_closed_form_variant)
+
+  
+  sorry
+
+
+
+lemma star_closed_form_bounded_by_rdistinct_list_estimate:
+  shows "rsize (rsimp ( RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
+         (star_updates s r0 [[c]]) ) ))) \<le>
+        Suc (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
+         (star_updates s r0 [[c]]) ) {})  ) )"
+
+  sorry
+
+lemma distinct_list_rexp_up_to_certain_size_bouded_by_set_enumerating_up_to_that_size:
+  shows "\<forall>r\<in> set rs. (rsize r ) \<le> N \<Longrightarrow> sum_list (map rsize (rdistinct rs {})) \<le>
+         (card (sizeNregex N))* N"
+  sorry
+
+
+lemma star_control_bounded:
+  shows "\<forall>s. rsize (rders_simp r0 s) \<le> N \<Longrightarrow>        
+      (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0))
+         (star_updates s r0 [[c]]) ) {})  ) ) \<le> 
+(card (sizeNregex (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))
+"
+  sorry
+
+lemma star_control_variant:
+  assumes "\<forall>s. rsize (rders_simp r0 s) \<le> N"
+  shows"Suc 
+      (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0)) 
+          (star_updates list r0 [[a]])) {}))) 
+\<le>  (Suc (card (sizeNregex (Suc (N + rsize (RSTAR r0))))) * Suc (N + rsize (RSTAR r0))) "
+  apply(subgoal_tac    "(sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0)) 
+          (star_updates list r0 [[a]])) {}))) 
+\<le>  ( (card (sizeNregex (Suc (N + rsize (RSTAR r0))))) * Suc (N + rsize (RSTAR r0))) ")
+  prefer 2
+  using assms star_control_bounded apply presburger
+  by simp
+
+
+
+lemma star_closed_form_bounded:
+  shows "\<forall>s. rsize (rders_simp r0 s) \<le> N \<Longrightarrow>
+              rsize (rders_simp (RSTAR r0) s) \<le> 
+max (   (Suc (card (sizeNregex (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))))   (rsize (RSTAR r0))"
+  apply(case_tac s)
+  apply simp
+  apply(subgoal_tac " rsize (rders_simp (RSTAR r0) (a # list)) = 
+rsize (rsimp ( RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates list r0 [[a]]) ) )))") 
+   prefer 2
+  using star_closed_form apply presburger
+  apply(subgoal_tac "rsize (rsimp (
+ RALTS ( (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates list    r0 [[a]]) ) ))) 
+\<le>         Suc (sum_list (map rsize (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0) )
+         (star_updates list r0 [[a]]) ) {})  ) )")
+  prefer 2
+  using star_closed_form_bounded_by_rdistinct_list_estimate apply presburger
+  apply(subgoal_tac "Suc (sum_list 
+                 (map rsize
+                   (rdistinct (map (\<lambda>s1. RSEQ (rders_simp r0 s1) (RSTAR r0)) (star_updates list r0 [[a]])) {}))) 
+\<le>  (Suc (card (sizeNregex (Suc (N + rsize (RSTAR r0))))) * Suc (N + rsize (RSTAR r0)))  ")
+  apply auto[1]
+  using star_control_variant by blast
+
+
+
+
+
+
+lemma seq_list_estimate_control: shows 
+" rsize (rsimp (RALTS (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1))))
+           \<le> Suc (sum_list (map rsize (rdistinct (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1)) {})))"
+  
+  sorry
+
+lemma seq_estimate_bounded: 
+  assumes "\<forall>s. rsize (rders_simp r1 s) \<le> N1" and "\<forall>s. rsize (rders_simp r2 s) \<le> N2"
+  shows
+"Suc (sum_list (map rsize (rdistinct (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1)) {}))) \<le>
+ Suc (Suc (N1 + (rsize r2)) + (N2 * card (sizeNregex N2)))"
+
+  sorry
+
+lemma seq_closed_form_bounded: shows
+"\<lbrakk>\<forall>s. rsize (rders_simp r1 s) \<le> N1 ; \<forall>s. rsize (rders_simp r2 s) \<le> N2\<rbrakk> \<Longrightarrow>
+rsize (rders_simp (RSEQ r1 r2) s) \<le> 
+max (Suc (Suc (N1 + (rsize r2)) + (N2 * card (sizeNregex N2)))) (rsize (RSEQ r1 r2)) "
+  apply(case_tac s)
+  apply simp
+  apply(subgoal_tac " (rders_simp (RSEQ r1 r2) s) = 
+rsimp (RALTS ((RSEQ (rders_simp r1 s) r2) # (map (rders_simp r2) (vsuf s r1))))")
+  prefer 2
+  using seq_closed_form_variant apply blast
+  apply(subgoal_tac "rsize (rsimp (RALTS (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1))))
+                    \<le>
+Suc (sum_list (map rsize (rdistinct (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1)) {})))")
+  apply(subgoal_tac "Suc (sum_list (map rsize (rdistinct (RSEQ (rders_simp r1 s) r2 # map (rders_simp r2) (vsuf s r1)) {})))
+\<le> Suc (Suc (N1 + (rsize r2)) + (N2 * card (sizeNregex N2)))")
+  prefer 2
+  using seq_estimate_bounded apply blast
+   apply(subgoal_tac "rsize (rders_simp (RSEQ r1 r2) s) \<le> Suc (Suc (N1 + rsize r2) + N2 * card (sizeNregex N2))")
+  using le_max_iff_disj apply blast
+   apply auto[1]
+  using seq_list_estimate_control by presburger
+
+
+lemma rders_simp_bounded: shows
+"\<exists>N. \<forall>s. rsize (rders_simp r s) \<le> N"
+  apply(induct r)
+       apply(rule_tac x = "Suc 0 " in exI)
+  using three_easy_cases0 apply force
+  using three_easy_cases1 apply blast
+  using three_easy_casesC apply blast
+  using seq_closed_form_bounded apply blast
+  apply (metis alts_closed_form_bounded size_list_estimation')
+  using star_closed_form_bounded by blast
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+(*Obsolete materials*)
+
+
+lemma rexp_size_induct:
+  shows "\<And>N r x5 a list.
+       \<lbrakk> rsize r = Suc N; r = RALTS x5;
+        x5 = a # list\<rbrakk>  \<Longrightarrow>\<exists>i j. rsize a = i \<and> rsize (RALTS list) = j \<and> i + j =  Suc N \<and> i \<le> N \<and> j \<le> N"
+  apply(rule_tac x = "rsize a" in exI)
+  apply(rule_tac x = "rsize (RALTS list)" in exI)
+  apply(subgoal_tac "rsize a \<ge> 1")
+   prefer 2
+  using One_nat_def non_zero_size apply presburger
+  apply(subgoal_tac "rsize (RALTS list) \<ge> 1 ")
+  prefer 2
+  using size_geq1 apply blast
+  apply simp
+  done
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+lemma star_update_case1:
+  shows "rnullable (rders_simp r s) \<Longrightarrow> star_update c r (s # Ss) = (s @ [c]) # [c] # (star_update c r Ss)"
+  
+  by force
+
+lemma star_update_case2:
+  shows "\<not>rnullable (rders_simp r s) \<Longrightarrow> star_update c r (s # Ss) = (s @ [c]) # (star_update c r Ss)"
+  by simp
+
+lemma bubble_break: shows "rflts [r, RZERO] = rflts [r]"
+  apply(case_tac r)
+       apply simp+
+  done
+
+lemma rsimp_alts_idem_aux1:
+  shows "rsimp_ALTs (rdistinct (rflts [rsimp a]) {}) = rsimp (RALTS [a])"
+  by force
+
+
+
+lemma rsimp_alts_idem_aux2:
+  shows "rsimp a = rsimp (RALTS [a])"
+  apply(simp)
+  apply(case_tac "rsimp a")
+       apply simp+
+  apply (metis no_alt_short_list_after_simp no_further_dB_after_simp)
+  by simp
+
+lemma rsimp_alts_idem:
+  shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs (a # [(rsimp (rsimp_ALTs as))] ))"
+  apply(induct as)
+   apply(subgoal_tac "rsimp (rsimp_ALTs [a, rsimp (rsimp_ALTs [])]) = rsimp (rsimp_ALTs [a, RZERO])")
+  prefer 2
+    apply simp
+  using bubble_break rsimp_alts_idem_aux2 apply auto[1]
+  apply(case_tac as)
+   apply(subgoal_tac "rsimp_ALTs( aa # as) = aa")
+  prefer 2
+    apply simp
+  using head_one_more_simp apply fastforce
+  apply(subgoal_tac "rsimp_ALTs (aa # as) = RALTS (aa # as)")
+  prefer 2
+  
+  using rsimp_ALTs.simps(3) apply presburger
+  
+  apply(simp only:)
+  apply(subgoal_tac "rsimp_ALTs (a # aa # aaa # list) = RALTS (a # aa # aaa # list)")
+  prefer 2
+  using rsimp_ALTs.simps(3) apply presburger
+  apply(simp only:)
+  apply(subgoal_tac "rsimp_ALTs [a, rsimp (RALTS (aa # aaa # list))] = RALTS (a # [rsimp (RALTS (aa # aaa # list))])")
+  prefer 2
+  
+  using rsimp_ALTs.simps(3) apply presburger
+  apply(simp only:)
+  using simp_flatten2
+  apply(subgoal_tac " rsimp (RALT a (rsimp (RALTS (aa # aaa # list))))  =  rsimp (RALT a ((RALTS (aa # aaa # list)))) ")
+  prefer 2
+
+  apply (metis head_one_more_simp list.simps(9) rsimp.simps(2))
+  apply (simp only:)
+  done
+
+
+lemma rsimp_alts_idem2:
+  shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs ((rsimp a) # [(rsimp (rsimp_ALTs as))] ))"
+  using head_one_more_simp rsimp_alts_idem by auto
+
+
+lemma evolution_step1:
+  shows "rsimp
+        (rsimp_ALTs
+          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
+         rsimp 
+        (rsimp_ALTs
+          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [(rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)))]))   "
+  using rsimp_alts_idem by auto
+
+lemma evolution_step2:
+  assumes " rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
+       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))"
+  shows "rsimp 
+        (rsimp_ALTs 
+          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = 
+                 rsimp 
+        (rsimp_ALTs
+          (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [ rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]))  "
+  by (simp add: assms rsimp_alts_idem)
+
+lemma rsimp_seq_aux1:
+  shows "r = RONE \<and> r2 = RSTAR r0 \<Longrightarrow> rsimp_SEQ r r2 = r2"
+  apply simp
+  done
+
+lemma multiple_alts_simp_flatten:
+  shows "rsimp (RALT (RALT r1 r2) (rsimp_ALTs rs)) = rsimp (RALTS (r1 # r2 # rs))"
+  by (metis Cons_eq_appendI append_self_conv2 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem simp_flatten)
+
+
+lemma evo3_main_aux1:
+  shows "rsimp
+            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) =
+           rsimp
+            (RALTS
+              (RSEQ (rders_simp r (a @ [x])) (RSTAR r) #
+               RSEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))"
+  apply(subgoal_tac "rsimp
+            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) =
+rsimp
+            (RALT (RALT (RSEQ ( (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) ")
+  prefer 2
+   apply (simp add: rsimp_idem)
+  apply (simp only:)
+  apply(subst multiple_alts_simp_flatten)
+  by simp
+
+
+lemma evo3_main_nullable:
+  shows "
+\<And>a Ss.
+       \<lbrakk>rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
+        rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)));
+        rders_simp r a \<noteq> RONE; rders_simp r a \<noteq> RZERO; rnullable (rders_simp r a)\<rbrakk>
+       \<Longrightarrow> rsimp
+            (rsimp_ALTs
+              [rder x (RSEQ (rders_simp r a) (RSTAR r)),
+               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
+           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
+  apply(subgoal_tac "rder x (RSEQ (rders_simp r a) (RSTAR r)) 
+                   = RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r))")
+  prefer 2
+   apply simp
+  apply(simp only:)
+  apply(subgoal_tac "star_update x r (a # Ss) = (a @ [x]) # [x] # (star_update x r Ss)")
+   prefer 2
+  using star_update_case1 apply presburger
+  apply(simp only:)
+  apply(subst List.list.map(2))+
+  apply(subgoal_tac "rsimp
+            (rsimp_ALTs
+              [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)),
+               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = 
+rsimp
+            (RALTS
+              [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)),
+               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))])")
+  prefer 2
+  using rsimp_ALTs.simps(3) apply presburger
+  apply(simp only:)
+  apply(subgoal_tac " rsimp
+            (rsimp_ALTs
+              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
+               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) 
+= 
+ rsimp
+            (RALTS
+              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
+               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))")
+
+  prefer 2
+  using rsimp_ALTs.simps(3) apply presburger
+  apply (simp only:)
+  apply(subgoal_tac " rsimp
+            (RALT (RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ ( (rder x r)) (RSTAR r)))
+              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = 
+             rsimp
+            (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r)))
+              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))")
+  prefer 2
+   apply (simp add: rsimp_idem)
+  apply(simp only:)
+  apply(subgoal_tac "             rsimp
+            (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r)))
+              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = 
+             rsimp
+            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))")
+   prefer 2
+  using rders_simp_append rders_simp_one_char rsimp_idem apply presburger
+  apply(simp only:)
+  apply(subgoal_tac " rsimp
+            (RALTS
+              (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) #
+               rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) = 
+ rsimp
+            (RALTS
+              (RSEQ (rders_simp r (a @ [x])) (RSTAR r) #
+               RSEQ (rders_simp r [x]) (RSTAR r) # map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))")
+  prefer 2
+  apply (smt (z3) idiot2 list.simps(9) rrexp.distinct(9) rsimp.simps(1) rsimp.simps(2) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_idem)
+  apply(simp only:)
+  apply(subgoal_tac "      rsimp
+            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              (rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) =
+     rsimp
+            (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r)))
+              ( (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))  ")
+  prefer 2
+  using rsimp_idem apply force
+  apply(simp only:)
+  using evo3_main_aux1 by blast
+  
+
+lemma evo3_main_not1:
+  shows " \<not>rnullable (rders_simp r a) \<Longrightarrow> rder x (RSEQ (rders_simp r a) (RSTAR r)) = RSEQ (rder x (rders_simp r a)) (RSTAR r)"
+  by fastforce
+
+
+lemma evo3_main_not2:
+  shows "\<not>rnullable (rders_simp r a) \<Longrightarrow>  rsimp
+            (rsimp_ALTs
+              (rder x (RSEQ (rders_simp r a) (RSTAR r)) # rs)) = rsimp
+            (rsimp_ALTs
+              ((RSEQ (rders_simp r (a @ [x])) (RSTAR r)) # rs))"
+  by (simp add: rders_simp_append rsimp_alts_idem2 rsimp_idem)
+
+lemma evo3_main_not3:
+  shows "rsimp
+            (rsimp_ALTs
+              (rsimp_SEQ r1 (RSTAR r) # rs)) = 
+         rsimp (rsimp_ALTs
+              (RSEQ r1 (RSTAR r) # rs))"
+  by (metis idiot2 rrexp.distinct(9) rsimp.simps(1) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2)
+
+
+lemma evo3_main_notnullable:
+  shows "\<And>a Ss.
+       \<lbrakk>rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
+        rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)));
+        rders_simp r a \<noteq> RONE; rders_simp r a \<noteq> RZERO; \<not>rnullable (rders_simp r a)\<rbrakk>
+       \<Longrightarrow> rsimp
+            (rsimp_ALTs
+              [rder x (RSEQ (rders_simp r a) (RSTAR r)),
+               rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
+           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
+  apply(subst star_update_case2)
+   apply simp
+  apply(subst List.list.map(2))
+  apply(subst evo3_main_not2)
+   apply simp
+  apply(subst evo3_main_not3)
+  using rsimp_alts_idem by presburger
+
+
+lemma evo3_aux2:
+  shows "rders_simp r a = RONE \<Longrightarrow> rsimp_SEQ (rders_simp (rders_simp r a) [x]) (RSTAR r) = RZERO"
+  by simp
+lemma evo3_aux3:
+  shows "rsimp (rsimp_ALTs (RZERO # rs)) = rsimp (rsimp_ALTs rs)"
+  by (metis list.simps(8) list.simps(9) rdistinct.simps(1) rflts.simps(1) rflts.simps(2) rsimp.simps(2) rsimp_ALTs.simps(1) rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem)
+
+lemma evo3_aux4:
+  shows " rsimp
+            (rsimp_ALTs
+              [RSEQ (rder x r) (RSTAR r),
+               rsimp (rsimp_ALTs rs)]) =
+           rsimp
+            (rsimp_ALTs
+              (rsimp_SEQ (rders_simp r [x]) (RSTAR r) # rs))"
+  by (metis rders_simp_one_char rsimp.simps(1) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2)
+
+lemma evo3_aux5:
+  shows "rders_simp r a \<noteq> RONE \<and> rders_simp r a \<noteq> RZERO \<Longrightarrow> rsimp_SEQ (rders_simp r a) (RSTAR r) = RSEQ (rders_simp r a) (RSTAR r)"
+  using idiot2 by blast
+
+
+lemma evolution_step3:
+  shows" \<And>a Ss.
+       rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) =
+       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) \<Longrightarrow>
+       rsimp
+        (rsimp_ALTs
+          [rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)),
+           rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) =
+       rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))"
+  apply(case_tac "rders_simp r a = RONE")
+   apply(subst rsimp_seq_aux1)
+    apply simp
+  apply(subst rder.simps(6))
+   apply(subgoal_tac "rnullable (rders_simp r a)")
+    prefer 2
+  using rnullable.simps(2) apply presburger
+   apply(subst star_update_case1)
+    apply simp
+
+   apply(subst List.list.map)+
+  apply(subst rders_simp_append)
+   apply(subst evo3_aux2)
+    apply simp
+   apply(subst evo3_aux3)
+   apply(subst evo3_aux4)
+   apply simp
+  apply(case_tac "rders_simp r a = RZERO")
+
+   apply (simp add: rsimp_alts_idem2)
+   apply(subgoal_tac "rders_simp r (a @ [x]) = RZERO")
+  prefer 2
+  using rder.simps(1) rders_simp_append rders_simp_one_char rsimp.simps(3) apply presburger
+  using rflts.simps(2) rsimp.simps(3) rsimp_SEQ.simps(1) apply presburger
+  apply(subst evo3_aux5)
+   apply simp
+  apply(case_tac "rnullable (rders_simp r a) ")
+  using evo3_main_nullable apply blast
+  using evo3_main_notnullable apply blast
+  done
+
+(*
+proof (prove)
+goal (1 subgoal):
+ 1. map f (a # s) = f a # map f s 
+Auto solve_direct: the current goal can be solved directly with
+  HOL.nitpick_simp(115): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
+  List.list.map(2): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
+  List.list.simps(9): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0
+*)
+lemma starseq_list_evolution:
+  fixes  r :: rrexp and Ss :: "char list list" and x :: char 
+  shows "rsimp (rsimp_ALTs (map (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss) ) =
+         rsimp (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))  )"   
+  apply(induct Ss)
+   apply simp
+  apply(subst List.list.map(2))
+  apply(subst evolution_step2)
+   apply simp
+
+
+  sorry
+
+
+lemma star_seqs_produce_star_seqs:
+  shows "rsimp (rsimp_ALTs (map (rder x \<circ> (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss))
+       = rsimp (rsimp_ALTs (map ( (\<lambda>s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss))"
+  by (meson comp_apply)
+
+lemma map_der_lambda_composition:
+  shows "map (rder x) (map (\<lambda>s. f s) Ss) = map (\<lambda>s. (rder x (f s))) Ss"
+  by force
+
+lemma ralts_vs_rsimpalts:
+  shows "rsimp (RALTS rs) = rsimp (rsimp_ALTs rs)"
+  by (metis evo3_aux3 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) simp_flatten2)
+  
+
+lemma linearity_of_list_of_star_or_starseqs: 
+  fixes r::rrexp and Ss::"char list list" and x::char
+  shows "\<exists>Ssa. rsimp (rder x (rsimp_ALTs (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))) =
+                 rsimp (RALTS ( (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ssa)))"
+  apply(subst rder_rsimp_ALTs_commute)
+  apply(subst map_der_lambda_composition)
+  using starseq_list_evolution
+  apply(rule_tac x = "star_update x r Ss" in exI)
+  apply(subst ralts_vs_rsimpalts)
+  by simp
+
+
+
+(*certified correctness---does not depend on any previous sorry*)
+lemma star_list_push_der: shows  " \<lbrakk>xs \<noteq> [] \<Longrightarrow> \<exists>Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss));
+        xs @ [x] \<noteq> []; xs \<noteq> []\<rbrakk> \<Longrightarrow>
+     \<exists>Ss. rders_simp (RSTAR r ) (xs @ [x]) = 
+        rsimp (RALTS (map (\<lambda>s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) ) Ss) )"
+  apply(subgoal_tac  "\<exists>Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))")
+  prefer 2
+  apply blast
+  apply(erule exE)
+  apply(subgoal_tac "rders_simp (RSTAR r) (xs @ [x]) = rsimp (rder x (rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
+  prefer 2
+  using rders_simp_append
+  using rders_simp_one_char apply presburger
+  apply(rule_tac x= "Ss" in exI)
+  apply(subgoal_tac " rsimp (rder x (rsimp (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) = 
+                       rsimp (rsimp (rder x (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
+  prefer 2
+  using inside_simp_removal rsimp_idem apply presburger
+  apply(subgoal_tac "rsimp (rsimp (rder x (RALTS (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) =
+                     rsimp (rsimp (RALTS (map (rder x) (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))")
+  prefer 2
+  using rder.simps(4) apply presburger
+  apply(subgoal_tac "rsimp (rsimp (RALTS (map (rder x) (map (\<lambda>s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) =
+                     rsimp (rsimp (RALTS (map (\<lambda>s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss)))")
+   apply (metis rsimp_idem)
+  by (metis map_der_lambda_composition)
+
+
+
+end
diff -r c6351a96bf80 -r a7e98deebb5c thys2/GeneralRegexBound.thy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/thys2/GeneralRegexBound.thy	Wed Mar 09 17:33:08 2022 +0000
@@ -0,0 +1,97 @@
+theory GeneralRegexBound imports 
+"BasicIdentities"
+begin
+
+
+lemma non_zero_size:
+  shows "rsize r \<ge> Suc 0"
+  apply(induct r)
+  apply auto done
+
+corollary size_geq1:
+  shows "rsize r \<ge> 1"
+  by (simp add: non_zero_size)
+
+
+definition SEQ_set where
+  "SEQ_set A n \<equiv> {RSEQ r1 r2 | r1 r2. r1 \<in> A \<and> r2 \<in> A \<and> rsize r1 + rsize r2 \<le> n}"
+
+definition SEQ_set_cartesian where
+"SEQ_set_cartesian A n  = {RSEQ r1 r2 | r1 r2. r1 \<in> A \<and> r2 \<in> A}"
+
+definition ALT_set where
+"ALT_set A n \<equiv> {RALTS rs | rs. set rs \<subseteq> A \<and> sum_list (map rsize rs) \<le> n}"
+
+
+definition
+  "sizeNregex N \<equiv> {r. rsize r \<le> N}"
+
+lemma sizenregex_induct:
+  shows "sizeNregex (Suc n) = sizeNregex n \<union> {RZERO, RONE, RALTS []} \<union> {RCHAR c| c. True} \<union>
+SEQ_set ( sizeNregex n) n \<union> ALT_set (sizeNregex n) n \<union> (RSTAR ` (sizeNregex n))"
+  sorry
+
+
+lemma chars_finite:
+  shows "finite (RCHAR ` (UNIV::(char set)))"
+  apply(simp)
+  done
+
+thm full_SetCompr_eq 
+
+lemma size1finite:
+  shows "finite (sizeNregex (Suc 0))"
+  apply(subst sizenregex_induct)
+  apply(subst finite_Un)+
+  apply(subgoal_tac "sizeNregex 0 = {}")
+  apply(rule conjI)+
+  apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def)
+      apply simp
+      apply (simp add: full_SetCompr_eq)
+  apply (simp add: SEQ_set_def)
+    apply (simp add: ALT_set_def)  
+   apply(simp add: full_SetCompr_eq)
+  using non_zero_size not_less_eq_eq sizeNregex_def by fastforce
+
+lemma seq_included_in_cart:
+  shows "SEQ_set A n \<subseteq> SEQ_set_cartesian A n"
+  using SEQ_set_cartesian_def SEQ_set_def by fastforce
+
+lemma finite_seq:
+  shows " finite (sizeNregex n) \<Longrightarrow> finite (SEQ_set (sizeNregex n) n)"
+  apply(rule finite_subset)
+  sorry
+
+
+lemma finite_size_n:
+  shows "finite (sizeNregex n)"
+  apply(induct n)
+  apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def)
+  apply(subst sizenregex_induct)
+  apply(subst finite_Un)+
+  apply(rule conjI)+
+       apply simp
+      apply simp
+     apply (simp add: full_SetCompr_eq)
+
+  sorry
+
+lemma three_easy_cases0: shows 
+"rsize (rders_simp RZERO s) \<le> Suc 0"
+  sorry
+
+
+lemma three_easy_cases1: shows 
+"rsize (rders_simp RONE s) \<le> Suc 0"
+  sorry
+
+lemma three_easy_casesC: shows
+"rsize (rders_simp (RCHAR c) s) \<le> Suc 0"
+
+  sorry
+
+
+
+
+end
+
diff -r c6351a96bf80 -r a7e98deebb5c thys2/SizeBoundStrong.thy
--- a/thys2/SizeBoundStrong.thy	Tue Mar 08 00:50:40 2022 +0000
+++ b/thys2/SizeBoundStrong.thy	Wed Mar 09 17:33:08 2022 +0000
@@ -942,21 +942,14 @@
 
 fun pAKC_aux :: "arexp list \<Rightarrow> arexp \<Rightarrow> rexp \<Rightarrow> arexp"
   where
-"pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ctx)) \<subseteq> (L (erase (AALTs [] rsa))) then AZERO else
-                          case r of (ASEQ bs r1 r2) \<Rightarrow> pAKC_aux rsa r1 (SEQ (erase r1) ctx)     |
-                                    (AALTs bs rs) \<Rightarrow> AALTs bs (map (\<lambda>r. pAKC_aux rsa r ctx) rs) |
+"pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) \<subseteq> (L (erase (AALTs [] rsa))) then AZERO else
+                          case r of (ASEQ bs r1 r2) \<Rightarrow> 
+                            bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ  (erase r1) ( ctx) )) r2   |
+                                    (AALTs bs rs) \<Rightarrow> 
+                            bsimp_AALTs bs (flts (map (\<lambda>r. pAKC_aux rsa r ( ctx) ) rs))    |
                                     r             \<Rightarrow> r
 )"
 
-|"pAKC_aux rsa (ASEQ bs1 a1 a2) ctx = (if (L (SEQ (SEQ (erase a1) (erase a2)) ctx)) \<subseteq>
-                                              L (erase (AALTs [] rsa)) 
-                                          then AZERO 
-                                          else (pAKC_aux rsa a1 (ASEQ bs1 a2 ctx)))"
-| "pAKC_aux rsa (AALTs bs1 rs1) ctx = ctx "
-
-fun pruneAwayKnownComponents :: "arexp list \<Rightarrow> arexp \<Rightarrow> arexp" where
-"pruneAwayKnownComponents rsa (ASEQ bs r1 r2) = (pAKC_aux rsa r1 r2)"
-
 
 
 inductive 
@@ -974,7 +967,7 @@
 | "AALTs bs [] \<leadsto> AZERO"
 | "AALTs bs [r] \<leadsto> fuse bs r"
 | "erase a1 = erase a2 \<Longrightarrow> AALTs bs (rsa@[a1]@rsb@[a2]@rsc) \<leadsto> AALTs bs (rsa@[a1]@rsb@rsc)"
-| "pruneAwayKnownComponents rsa a2 = a2' \<Longrightarrow> AALTs bs (rsa @ [a2] @ rsb) \<leadsto> AALTs bs (rsa @ [a2'] @ rsb)"
+| "pAKC_aux rsa a2 ONE = a2' \<Longrightarrow> AALTs bs (rsa @ [a2] @ rsb) \<leadsto> AALTs bs (rsa @ [a2'] @ rsb)"
 
 
 inductive