# HG changeset patch
# User Chengsong
# Date 1656103763 -3600
# Node ID 0f00d440f48471822ca965ca69ee76a92831db2d
# Parent  3ea82d8f0aa4a8813ca722ef03f4e5bde5670d98
more changes

diff -r 3ea82d8f0aa4 -r 0f00d440f484 ChengsongTanPhdThesis/Chapters/Finite.tex
--- a/ChengsongTanPhdThesis/Chapters/Finite.tex	Fri Jun 24 11:41:05 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex	Fri Jun 24 21:49:23 2022 +0100
@@ -206,6 +206,7 @@
 simplification and derivatives can be done by the size of their unlifted 
 counterpart with the unlifted version of simplification and derivatives applied.
 \begin{lemma}
+	The following equalities hold:
 	\begin{itemize}
 		\item
 $\asize{\bsimp{a}} = \rsize{\rsimp{\rerase{a}}}$
@@ -228,11 +229,23 @@
 %-----------------------------------
 %	SECTION ?
 %-----------------------------------
- \section{Roadmap to a Finiteness Proof}
+ \subsection{Finiteness Proof Using $\rrexp$s}
  Now that we have defined the $\rrexp$ datatype, and proven that its size changes
  w.r.t derivatives and simplifications mirrors precisely those of annotated regular expressions,
  we aim to bound the size of $r \backslash s$ for any $\rrexp$  $r$.
-The way we do it is by two steps:
+ Once we have a bound like: 
+ \[
+	 \llbracket r \backslash_{rsimp} s \rrbracket_r \leq N_r
+ \]
+ \noindent
+ we could easily extend that to 
+ \[
+	 \llbracket a \backslash_{bsimps} s \rrbracket \leq N_r.
+ \]
+
+ \subsection{Roadmap to a Bound for $\textit{Rrexp}$}
+
+The way we obtain the bound for $\rrexp$s is by two steps:
 \begin{itemize}
 	\item
 		First, we rewrite $r\backslash s$ into something else that is easier
@@ -240,23 +253,24 @@
 		$r_1 \cdot r_2$ and $r^*$, where the derivative can grow and bloat in a wild way,
 		but after simplification they will always be equal or smaller to a form consisting of an alternative
 		list of regular expressions $f \; (g\; (\sum rs))$ with some functions applied to it, where each element will be distinct after the function application.
-		The functions $f$ and $g$ are usually from $\flts$ $\distinctBy$ and other appearing in $\bsimp{_}$.
-		This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the
-		right hand side the "closed form" of $r\backslash s$.
 	\item
 		Then, for such a sum  list of regular expressions $f\; (g\; (\sum rs))$, we can control its size
 		by estimation, since $\distinctBy$ and $\flts$ are well-behaved and working together would only 
-		reduce the size of a regular expression, not adding to it. The closed form $f\; (g\; (\sum rs)) $ is controlled
-		by the size of $rs'$, where every element in $rs'$ is distinct, and each element can be described by som e inductive sub-structures (for example when $r = r_1 \cdot r_2$ then $rs'$ will be solely comprised of $r_1 \backslash s'$ and $r_2 \backslash s''$, $s'$ and $s''$ being sub-strings of $s$), which will have a numeric uppder bound according to inductive hypothesis, which controls $r \backslash s$.
+		reduce the size of a regular expression, not adding to it.
 \end{itemize}
 
-\section{Closed Forms}
-
+\section{Step One: Closed Forms}
+		We transform the function application $\rderssimp{r}{s}$
+		into an equivalent form $f\; (g \; (\sum rs))$.
+		The functions $f$ and $g$ can be anything from $\flts$, $\distinctBy$ and other helper functions from $\bsimp{\_}$.
+		This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the
+		right hand side the "closed form" of $r\backslash s$.
 \subsection{Basic Properties needed for Closed Forms}
 
 \subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully}
 The $\textit{rdistinct}$ function, as its name suggests, will
-remove duplicates according to the accumulator
+remove duplicates in an \emph{r}$\textit{rexp}$ list, 
+according to the accumulator
 and leave only one of each different element in a list:
 \begin{lemma}
 	The function $\textit{rdistinct}$ satisfies the following
@@ -314,15 +328,15 @@
 \begin{lemma}
 	If $rs_1$'s elements are all unique, and not appearing in the accumulator $acc$, 
 	then it can be taken out and left untouched in the output:
-	\[\textit{rdistinct}\;  rs_1 @ rsa\;\, acc
-	= rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1)\]
+	\[\textit{rdistinct}\;  (rs_1 @ rsa)\;\, acc
+	= rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\]
 \end{lemma}
 
 \begin{proof}
 By an induction on $rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary.
 \end{proof}
-\subsubsection{The properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$} 
-We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and  with $@$, the concatenation operator.
+\subsubsection{The Properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$} 
+We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and with $@$, the concatenation operator.
 These will be helpful in later closed form proofs, when
 we want to transform the ways in which multiple functions involving
 those are composed together
@@ -406,7 +420,10 @@
 Fortunately this is provable by induction on the list $rs$
 
 
+\section{Estimating the Closed Forms' sizes}
 
+		The closed form $f\; (g\; (\sum rs)) $ is controlled
+		by the size of $rs'$, where every element in $rs'$ is distinct, and each element can be described by som e inductive sub-structures (for example when $r = r_1 \cdot r_2$ then $rs'$ will be solely comprised of $r_1 \backslash s'$ and $r_2 \backslash s''$, $s'$ and $s''$ being sub-strings of $s$), which will have a numeric uppder bound according to inductive hypothesis, which controls $r \backslash s$.
 %-----------------------------------
 %	SECTION 2
 %-----------------------------------
diff -r 3ea82d8f0aa4 -r 0f00d440f484 thys2/BasicIdentities.thy
--- a/thys2/BasicIdentities.thy	Fri Jun 24 11:41:05 2022 +0100
+++ b/thys2/BasicIdentities.thy	Fri Jun 24 21:49:23 2022 +0100
@@ -248,6 +248,9 @@
 
 
 
+
+
+
 lemma rSEQ_mono:
   shows "rsize (rsimp_SEQ r1 r2) \<le>rsize ( RSEQ r1 r2)"
   apply auto
@@ -1138,7 +1141,7 @@
   shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)"
   by (simp add: rsimp_idem)
 
-lemma head_one_more_dersimp:
+corollary head_one_more_dersimp:
   shows "map rsimp ((rder x (rders_simp r s) # rs)) = map rsimp ((rders_simp r (s@[x]) ) # rs)"
   using head_one_more_simp rders_simp_append rders_simp_one_char by presburger
 
diff -r 3ea82d8f0aa4 -r 0f00d440f484 thys3/BlexerSimp.thy
--- a/thys3/BlexerSimp.thy	Fri Jun 24 11:41:05 2022 +0100
+++ b/thys3/BlexerSimp.thy	Fri Jun 24 21:49:23 2022 +0100
@@ -266,10 +266,22 @@
   shows " \<lbrakk>erase a \<in> erase ` set rs1\<rbrakk> \<Longrightarrow> (rs1 @ a # rs2) s\<leadsto>* (rs1 @ rs2)"
   apply(subgoal_tac "\<exists>rs1a rs1b x. rs1 = rs1a @ [x] @ rs1b \<and> erase x = erase a")
   prefer 2
+   apply (meson existing_preimage)
+  apply(erule exE)+
+  apply simp
+  apply(subgoal_tac "(rs1a @ [x] @ rs1b @ [a] @ rs2) s\<leadsto> (rs1a @ [x] @ rs1b @ rs2)")  
+  apply (simp add: rs_in_rstar)
+  apply(subgoal_tac "L (erase a) \<subseteq> L (erase x)")
+  using ss6 apply presburger
+  by simp
 
+lemma ss6_realistic:
+  shows "(rs1 @ rs2) s\<leadsto>* (rs1 @ dB6 rs2 (set (concat (map turnIntoTerms (map erase rs2)))))"
+  
 
   sorry
 
+
 lemma ss6_stronger_aux:
   shows "(rs1 @ rs2) s\<leadsto>* (rs1 @ dB6 rs2 (set (map erase rs1)))"
   apply(induct rs2 arbitrary: rs1)
@@ -280,6 +292,9 @@
   apply(subgoal_tac "(rs1 @ a # rs2) s\<leadsto>* (rs1 @ rs2)")
   using srewrites_trans apply blast
   using deletes_dB apply presburger
+  apply(case_tac "set (turnIntoTerms (erase a)) \<subseteq> erase ` set rs1")
+  
+  apply simp
 
   apply(auto)
   prefer 2