more changes
authorChengsong
Fri, 01 Jul 2022 13:02:15 +0100
changeset 557 812e5d112f49
parent 556 c27f04bb2262
child 558 671a83abccf3
more changes
ChengsongTanPhdThesis/Chapters/Finite.tex
ChengsongTanPhdThesis/Chapters/Introduction.tex
thys3/BlexerSimp.thy
thys3/ClosedForms.thy
--- a/ChengsongTanPhdThesis/Chapters/Finite.tex	Wed Jun 29 12:38:05 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Finite.tex	Fri Jul 01 13:02:15 2022 +0100
@@ -768,7 +768,7 @@
 We need more equalities like the above to enable a closed form,
 but to proceed we need to introduce two rewrite relations,
 to make things smoother.
-\subsubsection{The rewrite relation $\hrewrite$, $\frewrite$ and $\grewrite$}
+\subsubsection{The rewrite relation $\hrewrite$ , $\scfrewrites$ , $\frewrite$ and $\grewrite$}
 Insired by the success we had in the correctness proof 
 in \ref{Bitcoded2}, where we invented
 a term rewriting system to capture the similarity between terms,
@@ -777,7 +777,8 @@
 similarities between terms that would be otherwise
 hard to express.
 
-We use $\hrewrite$ for one-step atomic rewrite of regular expression simplification, 
+We use $\hrewrite$ for one-step atomic rewrite of 
+regular expression simplification, 
 $\frewrite$ for rewrite of list of regular expressions that 
 include all operations carried out in $\rflts$, and $\grewrite$ for
 rewriting a list of regular expressions possible in both $\rflts$ and $\rdistinct{}{}$.
@@ -821,6 +822,18 @@
 \end{mathpar}
 \end{center}
 
+
+	List of rewrite rules for a list of regular expressions,
+	where each element can rewrite in many steps to the other (scf stands for
+	li\emph{s}t \emph{c}losed \emph{f}orm). This relation is similar to the 
+	$\stackrel{s*}{\rightsquigarrow}$ for annotated regular expressions.
+
+\begin{center}
+\begin{mathpar}
+	\inferrule{}{[] \scfrewrites [] }
+	\inferrule{r \hrewrites r' \\ rs \scfrewrites rs'}{r :: rs \scfrewrites r' :: rs'}
+\end{mathpar}
+\end{center}
 %frewrite
 	List of one-step rewrite rules for flattening 
 	a list of  regular expressions($\frewrite$):
@@ -838,11 +851,11 @@
 	a list of regular expressions ($\grewrite$):
 \begin{center}
 \begin{mathpar}
-	\inferrule{}{\RZERO :: rs \frewrite rs \\}
+	\inferrule{}{\RZERO :: rs \grewrite rs \\}
 
-	\inferrule{}{(\sum rs) :: rs_a \frewrite rs @ rs_a \\}
+	\inferrule{}{(\sum rs) :: rs_a \grewrite rs @ rs_a \\}
 
-	\inferrule{rs_1 \frewrite rs_2}{r :: rs_1 \frewrite r :: rs_2}
+	\inferrule{rs_1 \grewrite rs_2}{r :: rs_1 \grewrite r :: rs_2}
 
 	\inferrule[dB]{}{rs_a @ [a] @ rs_b @[a] @ rs_c \grewrite rs_a @ [a] @ rsb @ rsc}
 \end{mathpar}
@@ -851,78 +864,336 @@
 \noindent
 The reason why we take the trouble of defining 
 two separate list rewriting definitions $\frewrite$ and $\grewrite$
-is that sometimes $\grewrites$ is slightly too powerful
-for proving equivalence.
+is to separate the two stages of simplification: flattening and de-duplicating.
+Sometimes $\grewrites$ is slightly too powerful
+so we would rather use $\frewrites$ which makes certain rewriting steps 
+more straightforward to prove.
 For example, when proving the closed-form for the alternative regular expression,
 one of the rewriting steps would be:
 \begin{lemma}
-	$\sum (\rDistinct \; (\map \; (\_ \backslash x) \; (\rflts \; rs)) \; \varnothing) \sequal
-	 \sum (\rDistinct \; (\rflts \; (\map \; (\_ \backslash x) \; rs)) \; \varnothing)
+	$\sum (\rDistinct \;\; (\map \; (\_ \backslash x) \; (\rflts \; rs)) \;\; \varnothing) \sequal
+	 \sum (\rDistinct \;\;  (\rflts \; (\map \; (\_ \backslash x) \; rs)) \;\; \varnothing)
 	 $
 \end{lemma}
 \noindent
 Proving this is by first showing 
-\begin{lemma}
+\begin{lemma}\label{earlyLaterDerFrewrites}
 	$\map \; (\_ \backslash x) \;  (\rflts \; rs) \frewrites
-\rflts \; (\map \; (\_ \backslash x) \; rs)$
+	\rflts \; (\map \; (\_ \backslash x) \; rs)$
 \end{lemma}
 \noindent
 and then using lemma
 \begin{lemma}\label{frewritesSimpeq}
 	If $rs_1 \frewrites rs_2 $, then $\sum (\rDistinct \; rs_1 \; \varnothing) \sequal 
-	\sum (\rDistinct \;  rs_2 \;  {})$.
+	\sum (\rDistinct \;  rs_2 \;  \varnothing)$.
 \end{lemma}
-. But this trick will not work for $\grewrites$:
+\noindent
+is a piece of cake.
+But this trick will not work for $\grewrites$.
+For example, a rewriting step in proving
+closed forms is:
+\begin{center}
+$\rsimp{(\rsimpalts \; (\map \; (\_ \backslash x) \; (\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs}))))}{\varnothing})))}$\\
+$=$ \\
+$\rsimp{(\rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \; (\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}))} $
+\noindent
+\end{center}
+For this one would hope to have a rewriting relation between the two lists involved,
+similar to \ref{earlyLaterDerFrewrites}. However, it turns out that 
 \begin{center}
 $\map \; (\_ \backslash x) \; (\rDistinct \; rs \; rset) \grewrites \rDistinct \; (\map \;
-(\_ \backslash x) \; rs) \; (\map \; (\_ \backslash x) \; rset)$
+(\_ \backslash x) \; rs) \; ( rset \backslash x)$
 \end{center}
 \noindent
-does \emph{not} hold.
-
-
+does $\mathbf{not}$ hold in general.
+For this rewriting step we will introduce some slightly more cumbersome
+proof technique in later sections.
+The point is that $\frewrite$
+allows us to prove equivalence in a straightforward two-step method that is 
+not possible for $\grewrite$, thereby reducing the complexity of the entire proof.
 
 
-%this is for closed form for alts section, talk about that later------------------------------
-\begin{comment}
+\subsubsection{Terms That Can Be Rewritten Using $\hrewrites$, $\grewrites$, and $\frewrites$}
+We present in the below lemma a few pairs of terms that are rewritable via 
+$\grewrites$:
+\begin{lemma}\label{gstarRdistinctGeneral}
+	\begin{itemize}
+		\item
+			$rs_1 @ rs \grewrites rs_1 @ (\rDistinct \; rs \; rs_1)$
+		\item
+			$rs \grewrites \rDistinct \; rs \; \varnothing$
+		\item
+			$rs_a @ (\rDistinct \; rs \; rs_a) \grewrites rs_a @ (\rDistinct \; 
+			rs \; (\{\RZERO\} \cup rs_a))$
+		\item
+			$rs \;\; @ \;\; \rDistinct \; rs_a \; rset \grewrites rs @  \rDistinct \; rs_a \;
+			(rest \cup rs)$
+
+	\end{itemize}
+\end{lemma}
+\noindent
+If a pair of terms $rs_1, rs_2$ are rewritable via $\grewrites$ to each other,
+then they are equivalent under $\rsimp{}$:
+\begin{lemma}\label{grewritesSimpalts}
+	If $rs_1 \grewrites rs_2$, then
+	we have the following equivalence hold:
+	\begin{itemize}
+		\item
+			$\sum rs_1 \sequal \sum rs_2$
+		\item
+			$\rsimpalts \; rs_1 \sequal \rsimpalts \; rs_2$
+	\end{itemize}
+\end{lemma}
+\noindent
+Here are a few connecting lemmas showing that
+if a list of regular expressions can be rewritten using $\grewrites$ or $\frewrites $ or
+$\scfrewrites$,
+then an alternative constructor taking the list can also be rewritten using $\hrewrites$:
+\begin{lemma}
+	\begin{itemize}
+		\item
+			If $rs \grewrites rs'$ then $\sum rs \hrewrites \sum rs'$.
+		\item
+			If $rs \grewrites rs'$ then $\sum rs \hrewrites \rsimpalts \; rs'$
+		\item
+			If $rs_1 \scfrewrites rs_2$ then $\sum (rs @ rs_1) \hrewrites \sum (rs @ rs_2)$
+		\item
+			If $rs_1 \scfrewrites rs_2$ then $\sum rs_1 \hrewrites \sum rs_2$
+
+	\end{itemize}
+\end{lemma}
+\noindent
+Here comes the meat of the proof, 
+which says that once two lists are rewritable to each other,
+then they are equivalent under $\rsimp{}$:
+\begin{lemma}
+	If $r_1 \hrewrites r_2$ then $r_1 \sequal r_2$.
+\end{lemma}
+
+\noindent
+And similar to \ref{Bitcoded2} one can preserve rewritability after taking derivative
+of two regular expressions on both sides:
+\begin{lemma}\label{interleave}
+	If $r \hrewrites r' $ then $\rder{c}{r} \hrewrites \rder{c}{r'}$
+\end{lemma}
+\noindent
+This allows proving more $\mathbf{rsimp}$-equivalent terms, involving $\backslash_r$ now.
+\begin{lemma}\label{insideSimpRemoval}
+	$\rsimp{\rder{c}{\rsimp{r}}} = \rsimp{\rder{c}{r}}  $
+\end{lemma}
+\noindent
+\begin{proof}
+	By \ref{interleave} and \ref{rsimpIdem}.
+\end{proof}
+\noindent
+And this unlocks more equivalent terms:
+\begin{lemma}\label{Simpders}
+	As corollaries of \ref{insideSimpRemoval}, we have
+	\begin{itemize}
+		\item
+			If $s \neq []$ then $\rderssimp{r}{s} = \rsimp{(\rders \; r \; s)}$.
+		\item
+			$\rsimpalts \; (\map \; (\_ \backslash_r x) \;
+				(\rdistinct{rs}{\varnothing})) \sequal
+			 \rsimpalts \; (\rDistinct \; 
+			 (\map \; (\_ \backslash_r x) rs) \;\varnothing  )$
+	 \end{itemize}
+ \end{lemma}
+\noindent
+
+Finally,
+together with 
+\begin{lemma}\label{rderRsimpAltsCommute}
+	$\rder{x}{(\rsimpalts \; rs)} = \rsimpalts \; (\map \; (\rder{x}{\_}) \; rs)$
+\end{lemma}
+\noindent
+this leads to the first closed form--
+\begin{lemma}\label{altsClosedForm}
 \begin{center}
-$\rsimp{(\rsimpalts \; (\map \; (\_ \backslash x) \; (\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs}))))}{\varnothing})))} = 
-\rsimp{(\rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \; (\rflts \; (\map \; (\rsimp{} \; \circ \; (\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}))} $
+	$\rderssimp{(\sum rs)}{s} \sequal
+	 \sum \; (\map \; (\rderssimp{\_}{s}) \; rs)$
 \end{center}
+\end{lemma}
 	
 \noindent
-This is not possible to prove 
+\begin{proof}
+	By a reverse induction on the string $s$.
+	One rewriting step, as we mentioned earlier,
+	involves
+	\begin{center}
+		$\rsimpalts \; (\map \; (\_ \backslash x) \; 
+		(\rdistinct{(\rflts \; (\map \; (\rsimp{} \; \circ \; 
+		(\lambda r. \rderssimp{r}{xs}))))}{\varnothing}))
+		\sequal
+		\rsimpalts \; (\rdistinct{(\map \; (\_ \backslash x) \; 
+		(\rflts \; (\map \; (\rsimp{} \; \circ \; 
+		(\lambda r. \rderssimp{r}{xs})))) ) }{\varnothing}) $.
+	\end{center}
+	This can be proven by a combination of 
+	\ref{grewritesSimpalts}, \ref{gstarRdistinctGeneral}, \ref{rderRsimpAltsCommute}, and
+	\ref{insideSimpRemoval}.
+\end{proof}
+\noindent
+This closed form has a variant which can be more convenient in later proofs:
+\begin{corollary}
+	If $s \neq []$ then 
+	$\rderssimp \; (\sum \; rs) \; s = 
+	\rsimp{(\sum \; (\map \; \rderssimp{\_}{s} \; rs))}$.
+\end{corollary}
+\noindent
+The harder closed forms are the sequence and star ones.
+Before we go on to obtain them, some preliminary definitions
+are needed to make proof statements concise.
 
-\end{comment}
-%this is for closed form for alts section, talk about that later------------------------------
-And we define an "grewrite" relation that works on lists:
+\section{"Closed Forms" of regular expressions' derivatives w.r.t strings}
+We note that the different ways in which regular expressions are 
+nested do not matter under $\rsimp{}$:
+\begin{center}
+	To be proven:\\
+	$\rsimp{r} \stackrel{?}{=} \rsimp{r'}$ if $r = \sum [r_1, r_2, r_3, \ldots]$ 
+and $r' =(\ldots ((r_1 + r_2) + r_3) + \ldots)$
+\end{center}
+\noindent
+This intuition is also echoed by IndianPaper, where they gave
+a pencil-and-paper derivation of $(r_1 \cdot r_2)\backslash s$:
 \begin{center}
-\begin{tabular}{lcl}
-$ \ZERO :: rs$ & $\rightarrow_g$ & $rs$
+	$((r_1 \cdot r_2) \backslash_r c_1) \stackrel{\backslash_r c_2}{=}
+	((r_1 \backslash_r c_1) \cdot r_2 + (\delta\; (\rnullable \; r_1),\; r_2 \backslash_r c_1)) 
+	\stackrel{\backslash_r c_2}{=}
+	((r_1 \backslash_r c_1c_2 \cdot r_2 + (\delta \; (\rnullable) \; r_1, r_2 \backslash_r c_1c_2)
+	+ (\delta \; (\rnullable \; r_1 \backslash_r c)\; r_2 \backslash_r c_2)
+	$
+\end{center}
+\noindent
+The $\delta \; b \; r$ function above turns the entire term into $\RZERO$
+if the boolean condition $b$ evaluates to false, and outputs $r$ otherwise.
+The equality in their derivation steps should be interpretated
+as language equivalence. To pin down the intuition into rigorous terms,
+$\sflat{}$ is used to enable the transformation from 
+a left-associative nested sequence of alternatives into 
+a flattened list:
+$r = \sum [r_1, r_2, r_3, \ldots] \stackrel{\sflat{}}{\rightarrow} 
+(\ldots ((r_1 + r_2) + r_3) + \ldots)$
+The definitions $\sflat{}$, $\sflataux{}$ are given below.
+
+ \begin{center}  
+ \begin{tabular}{ccc}
+ $\sflataux{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
+$\sflataux{\AALTS{ }{[]}}$ & $ = $ & $ []$\\
+$\sflataux r$ & $=$ & $ [r]$
 \end{tabular}
 \end{center}
 
-
+ \begin{center} 
+ \begin{tabular}{ccc}
+	 $\sflat{(\sum r :: rs)}$ & $=$ & $\sum (\sflataux{r} @ rs)$\\
+	 $\sflat{\sum []}$ & $ = $ & $ \sum []$\\
+$\sflat r$ & $=$ & $ r$
+\end{tabular}
+\end{center}
+The intuition behind $\sflataux{\_}$ is to break up nested regexes 
+of the $(\ldots((r_1 + r_2) + r_3) + \ldots )$(left-associated) shape
+into a more "balanced" list: $\AALTS{\_}{[r_1,\, r_2 ,\, r_3, \ldots]}$.
+It will return the singleton list $[r]$ otherwise.
+$\sflat{\_}$ works the same  as $\sflataux{\_}$, except that it keeps
+the output type a regular expression, not a list.
+$\sflataux{\_}$  and $\sflat{\_}$ is only recursive in terms of the
+ first element of the list of children of
+an alternative regex ($\AALTS{ }{rs}$), and is purposefully built for  dealing with the regular expression
+$r_1 \cdot r_2 \backslash s$.
+\subsection{The $\textit{vsuf}$ Function}
+Note that $((r_1 \backslash_r c_1c_2 \cdot r_2 + (\delta \; (\rnullable) \; r_1, r_2 \backslash_r c_1c_2)
+	+ (\delta \; (\rnullable \; r_1 \backslash_r c)\; r_2 \backslash_r c_2)$ does not faithfully
+represent what would the intermediate derivatives would actually look like.
+For example, when $r_1$ and $r_1 \backslash_r c_1$ are nullable,
+the regular expression would not look like $(r_1 \backslash_r c_1c_2 + \RZERO ) + \RZERO$,
+but actually $r_1\backslash_r c_1c_2$, the redundant $\RZERO$s will not be created in the
+first place.
+In a closed-form one would want to take into account this 
+and generate the list of string pairs $(s', s'')$ where $s'@s'' = s$ such that
+only $r_1 \backslash s'$ nullable.
+With $\sflat{\_}$ and $\sflataux{\_}$ we make 
+precise what  "closed forms" we have for the sequence derivatives and their simplifications,
+in other words, how can we construct $(r_1 \cdot r_2) \backslash s$
+and $\bderssimp{(r_1\cdot r_2)}{s}$,
+if we are only allowed to use a combination of $r_1 \backslash s'$ ($\bderssimp{r_1}{s'}$)
+and  $r_2 \backslash s'$ ($\bderssimp{r_2}{s'}$), where $s'$  ranges over 
+the substring of $s$?
+First let's look at a series of derivatives steps on a sequence 
+regular expression,  (assuming) that each time the first
+component of the sequence is always nullable):
+\begin{center}
 
-With these relations we prove
+$r_1 \cdot r_2 \quad \longrightarrow_{\backslash c}  \quad   r_1  \backslash c \cdot r_2 + r_2 \backslash c \quad \longrightarrow_{\backslash c'} \quad (r_1 \backslash cc' \cdot r_2 + r_2 \backslash c') + r_2 \backslash cc' \longrightarrow_{\backslash c''} \quad$\\
+$((r_1 \backslash cc'c'' \cdot r_2 + r_2 \backslash c'') + r_2 \backslash c'c'') + r_2 \backslash cc'c''   \longrightarrow_{\backslash c''} \quad
+ \ldots$
+
+\end{center}
+%TODO: cite indian paper
+Indianpaper have  come up with a slightly more formal way of putting the above process:
+\begin{center}
+$r_1 \cdot r_2 \backslash (c_1 :: c_2 ::\ldots c_n) \myequiv r_1 \backslash (c_1 :: c_2:: \ldots c_n) +
+\delta(\nullable(r_1 \backslash (c_1 :: c_2 \ldots c_{n-1}) ), r_2 \backslash (c_n)) + \ldots
++ \delta (\nullable(r_1), r_2\backslash (c_1 :: c_2 ::\ldots c_n))$
+\end{center}
+where  $\delta(b, r)$ will produce $r$
+if $b$ evaluates to true, 
+and $\ZERO$ otherwise.
+
+ But the $\myequiv$ sign in the above equation means language equivalence instead of syntactical
+ equivalence. To make this intuition useful 
+ for a formal proof, we need something
+stronger than language equivalence.
+With the help of $\sflat{\_}$ we can state the equation in Indianpaper
+more rigorously:
 \begin{lemma}
-$rs \rightarrow rs'  \implies \RALTS{rs} \rightarrow \RALTS{rs'}$
-\end{lemma}
-which enables us to prove "closed forms" equalities such as
-\begin{lemma}
-$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\suffix \; s \; r_1 ))}$
+$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}$
 \end{lemma}
 
-But the most involved part of the above lemma is proving the following:
+The function $\vsuf{\_}{\_}$ is defined recursively on the structure of the string:
+
+\begin{center}
+\begin{tabular}{lcl}
+$\vsuf{[]}{\_} $ & $=$ &  $[]$\\
+$\vsuf{c::cs}{r_1}$ & $ =$ & $ \textit{if} (\rnullable{r_1}) \textit{then} \; (\vsuf{cs}{(\rder{c}{r_1})}) @ [c :: cs]$\\
+                                     && $\textit{else} \; (\vsuf{cs}{(\rder{c}{r_1}) })  $
+\end{tabular}
+\end{center}                   
+It takes into account which prefixes $s'$ of $s$ would make $r \backslash s'$ nullable,
+and keep a list of suffixes $s''$ such that $s' @ s'' = s$. The list is sorted in
+the order $r_2\backslash s''$ appears in $(r_1\cdot r_2)\backslash s$.
+In essence, $\vsuf{\_}{\_}$ is doing a "virtual derivative" of $r_1 \cdot r_2$, but instead of producing 
+the entire result of  $(r_1 \cdot r_2) \backslash s$, 
+it only stores all the $s''$ such that $r_2 \backslash s''$  are occurring terms in $(r_1\cdot r_2)\backslash s$.
+
+With this we can also add simplifications to both sides and get
 \begin{lemma}
-$\bsimp{\RALTS{rs @ \RALTS{rs_1} @ rs'}} = \bsimp{\RALTS{rs @rs_1 @ rs'}} $ 
+$\rsimp{\sflat{(r_1 \cdot r_2) \backslash s} }= \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
 \end{lemma}
-which is needed in proving things like 
+Together with the idempotency property of $\rsimp{}$\ref{rsimpIdem},
+%TODO: state the idempotency property of rsimp
+it is possible to convert the above lemma to obtain a "closed form"
+for  derivatives nested with simplification:
 \begin{lemma}
-$r \rightarrow_f r'  \implies \rsimp{r} \rightarrow \rsimp{r'}$
+$\rderssimp{(r_1 \cdot r_2)}{s} = \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
 \end{lemma}
+And now the reason we have (1) in section 1 is clear:
+$\rsize{\rsimp{\RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map \;(r_2 \backslash \_) \; (\vsuf{s}{r_1}))}}}$, 
+is equal to $\rsize{\rsimp{\RALTS{ ((r_1 \backslash s) \cdot r_2 :: (\map \; (r_2 \backslash \_) \; (\textit{Suffix}(r1, s))))}}}$
+    as $\vsuf{s}{r_1}$ is one way of expressing the list $\textit{Suffix}( r_1, s)$.
 
-Fortunately this is provable by induction on the list $rs$
+%----------------------------------------------------------------------------------------
+%	SECTION 3
+%----------------------------------------------------------------------------------------
+
+\section{interaction between $\distinctWith$ and $\flts$}
+Note that it is not immediately obvious that 
+\begin{center}
+$\llbracket \distinctWith (\flts \textit{rs}) = \phi \rrbracket   \leq \llbracket \distinctWith \textit{rs} = \phi \rrbracket  $.
+\end{center}
+The intuition is that if we remove duplicates from the $\textit{LHS}$, at least the same amount of 
+duplicates will be removed from the list $\textit{rs}$ in the $\textit{RHS}$. 
+
 
 \subsection{A Closed Form for the Sequence Regular Expression}
 \begin{quote}\it
@@ -1223,119 +1494,6 @@
 %	SECTION ??
 %----------------------------------------------------------------------------------------
 
-\section{"Closed Forms" of regular expressions' derivatives w.r.t strings}
-To embark on getting the "closed forms" of regexes, we first
-define a few auxiliary definitions to make expressing them smoothly.
-
- \begin{center}  
- \begin{tabular}{ccc}
- $\sflataux{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
-$\sflataux{\AALTS{ }{[]}}$ & $ = $ & $ []$\\
-$\sflataux r$ & $=$ & $ [r]$
-\end{tabular}
-\end{center}
-The intuition behind $\sflataux{\_}$ is to break up nested regexes 
-of the $(\ldots((r_1 + r_2) + r_3) + \ldots )$(left-associated) shape
-into a more "balanced" list: $\AALTS{\_}{[r_1,\, r_2 ,\, r_3, \ldots]}$.
-It will return the singleton list $[r]$ otherwise.
-
-$\sflat{\_}$ works the same  as $\sflataux{\_}$, except that it keeps
-the output type a regular expression, not a list:
- \begin{center} 
- \begin{tabular}{ccc}
- $\sflat{\AALTS{ }{r :: rs}}$ & $=$ & $\sflataux{r} @ rs$\\
-$\sflat{\AALTS{ }{[]}}$ & $ = $ & $ \AALTS{ }{[]}$\\
-$\sflat r$ & $=$ & $ [r]$
-\end{tabular}
-\end{center}
-$\sflataux{\_}$  and $\sflat{\_}$ is only recursive in terms of the
- first element of the list of children of
-an alternative regex ($\AALTS{ }{rs}$), and is purposefully built for  dealing with the regular expression
-$r_1 \cdot r_2 \backslash s$.
-
-With $\sflat{\_}$ and $\sflataux{\_}$ we make 
-precise what  "closed forms" we have for the sequence derivatives and their simplifications,
-in other words, how can we construct $(r_1 \cdot r_2) \backslash s$
-and $\bderssimp{(r_1\cdot r_2)}{s}$,
-if we are only allowed to use a combination of $r_1 \backslash s'$ ($\bderssimp{r_1}{s'}$)
-and  $r_2 \backslash s'$ ($\bderssimp{r_2}{s'}$), where $s'$  ranges over 
-the substring of $s$?
-First let's look at a series of derivatives steps on a sequence 
-regular expression,  (assuming) that each time the first
-component of the sequence is always nullable):
-\begin{center}
-
-$r_1 \cdot r_2 \quad \longrightarrow_{\backslash c}  \quad   r_1  \backslash c \cdot r_2 + r_2 \backslash c \quad \longrightarrow_{\backslash c'} \quad (r_1 \backslash cc' \cdot r_2 + r_2 \backslash c') + r_2 \backslash cc' \longrightarrow_{\backslash c''} \quad$\\
-$((r_1 \backslash cc'c'' \cdot r_2 + r_2 \backslash c'') + r_2 \backslash c'c'') + r_2 \backslash cc'c''   \longrightarrow_{\backslash c''} \quad
- \ldots$
-
-\end{center}
-%TODO: cite indian paper
-Indianpaper have  come up with a slightly more formal way of putting the above process:
-\begin{center}
-$r_1 \cdot r_2 \backslash (c_1 :: c_2 ::\ldots c_n) \myequiv r_1 \backslash (c_1 :: c_2:: \ldots c_n) +
-\delta(\nullable(r_1 \backslash (c_1 :: c_2 \ldots c_{n-1}) ), r_2 \backslash (c_n)) + \ldots
-+ \delta (\nullable(r_1), r_2\backslash (c_1 :: c_2 ::\ldots c_n))$
-\end{center}
-where  $\delta(b, r)$ will produce $r$
-if $b$ evaluates to true, 
-and $\ZERO$ otherwise.
-
- But the $\myequiv$ sign in the above equation means language equivalence instead of syntactical
- equivalence. To make this intuition useful 
- for a formal proof, we need something
-stronger than language equivalence.
-With the help of $\sflat{\_}$ we can state the equation in Indianpaper
-more rigorously:
-\begin{lemma}
-$\sflat{(r_1 \cdot r_2) \backslash s} = \RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}$
-\end{lemma}
-
-The function $\vsuf{\_}{\_}$ is defined recursively on the structure of the string:
-
-\begin{center}
-\begin{tabular}{lcl}
-$\vsuf{[]}{\_} $ & $=$ &  $[]$\\
-$\vsuf{c::cs}{r_1}$ & $ =$ & $ \textit{if} (\rnullable{r_1}) \textit{then} \; (\vsuf{cs}{(\rder{c}{r_1})}) @ [c :: cs]$\\
-                                     && $\textit{else} \; (\vsuf{cs}{(\rder{c}{r_1}) })  $
-\end{tabular}
-\end{center}                   
-It takes into account which prefixes $s'$ of $s$ would make $r \backslash s'$ nullable,
-and keep a list of suffixes $s''$ such that $s' @ s'' = s$. The list is sorted in
-the order $r_2\backslash s''$ appears in $(r_1\cdot r_2)\backslash s$.
-In essence, $\vsuf{\_}{\_}$ is doing a "virtual derivative" of $r_1 \cdot r_2$, but instead of producing 
-the entire result of  $(r_1 \cdot r_2) \backslash s$, 
-it only stores all the $s''$ such that $r_2 \backslash s''$  are occurring terms in $(r_1\cdot r_2)\backslash s$.
-
-With this we can also add simplifications to both sides and get
-\begin{lemma}
-$\rsimp{\sflat{(r_1 \cdot r_2) \backslash s} }= \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
-\end{lemma}
-Together with the idempotency property of $\rsimp{}$\ref{rsimpIdem},
-%TODO: state the idempotency property of rsimp
-it is possible to convert the above lemma to obtain a "closed form"
-for  derivatives nested with simplification:
-\begin{lemma}
-$\rderssimp{(r_1 \cdot r_2)}{s} = \rsimp{\AALTS{[[]}{ (r_1 \backslash s) \cdot r_2 :: (\map (r_2 \backslash \_) (\vsuf{s}{r_1}))}}$
-\end{lemma}
-And now the reason we have (1) in section 1 is clear:
-$\rsize{\rsimp{\RALTS{ (r_1 \backslash s) \cdot r_2 :: (\map \;(r_2 \backslash \_) \; (\vsuf{s}{r_1}))}}}$, 
-is equal to $\rsize{\rsimp{\RALTS{ ((r_1 \backslash s) \cdot r_2 :: (\map \; (r_2 \backslash \_) \; (\textit{Suffix}(r1, s))))}}}$
-    as $\vsuf{s}{r_1}$ is one way of expressing the list $\textit{Suffix}( r_1, s)$.
-
-%----------------------------------------------------------------------------------------
-%	SECTION 3
-%----------------------------------------------------------------------------------------
-
-\section{interaction between $\distinctWith$ and $\flts$}
-Note that it is not immediately obvious that 
-\begin{center}
-$\llbracket \distinctWith (\flts \textit{rs}) = \phi \rrbracket   \leq \llbracket \distinctWith \textit{rs} = \phi \rrbracket  $.
-\end{center}
-The intuition is that if we remove duplicates from the $\textit{LHS}$, at least the same amount of 
-duplicates will be removed from the list $\textit{rs}$ in the $\textit{RHS}$. 
-
-
 %-----------------------------------
 %	SECTION syntactic equivalence under simp
 %-----------------------------------
--- a/ChengsongTanPhdThesis/Chapters/Introduction.tex	Wed Jun 29 12:38:05 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Introduction.tex	Fri Jul 01 13:02:15 2022 +0100
@@ -21,6 +21,7 @@
 \newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3}
 \newcommand{\bderssimp}[2]{#1 \backslash_{bsimps} #2}
 \newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2}
+\def\rders{\textit{rders}}
 \newcommand{\bders}[2]{#1 \backslash #2}
 \newcommand{\bsimp}[1]{\textit{bsimp}(#1)}
 \newcommand{\rsimp}[1]{\textit{rsimp}\; #1}
@@ -52,13 +53,14 @@
 \def\internalise{\textit{internalise}}
 \def\lexer{\mathit{lexer}}
 \def\mkeps{\textit{mkeps}}
-\newcommand{\rder}[2]{#2 \backslash #1}
+\newcommand{\rder}[2]{#2 \backslash_r #1}
 
 \def\nonnested{\textit{nonnested}}
 \def\AZERO{\textit{AZERO}}
 \def\AONE{\textit{AONE}}
 \def\ACHAR{\textit{ACHAR}}
 
+\def\scfrewrites{\stackrel{*}{\rightsquigarrow_{scf}}}
 \def\frewrite{\rightsquigarrow_f}
 \def\hrewrite{\rightsquigarrow_h}
 \def\grewrite{\rightsquigarrow_g}
--- a/thys3/BlexerSimp.thy	Wed Jun 29 12:38:05 2022 +0100
+++ b/thys3/BlexerSimp.thy	Fri Jul 01 13:02:15 2022 +0100
@@ -276,9 +276,12 @@
   by simp
 
 lemma ss6_realistic:
-  shows "(rs1 @ rs2) s\<leadsto>* (rs1 @ dB6 rs2 (set (concat (map turnIntoTerms (map erase rs2)))))"
-  
-
+  shows "(rs1 @ rs2) s\<leadsto>* (rs1 @ dB6 rs2 (set (concat (map turnIntoTerms (map erase rs1)))))"
+  apply(induct rs2 arbitrary: rs1)
+   apply simp
+  apply(rename_tac cc cc')
+ 
+ 
   sorry
 
 
--- a/thys3/ClosedForms.thy	Wed Jun 29 12:38:05 2022 +0100
+++ b/thys3/ClosedForms.thy	Fri Jul 01 13:02:15 2022 +0100
@@ -887,14 +887,16 @@
   using rders_simp_same_simpders rsimp_idem by auto
 
 lemma repeated_altssimp:
-  shows "\<forall>r \<in> set rs. rsimp r = r \<Longrightarrow> rsimp (rsimp_ALTs (rdistinct (rflts rs) {})) =
+  shows "\<forall>r \<in> set rs. rsimp r = r \<Longrightarrow> 
+           rsimp (rsimp_ALTs (rdistinct (rflts rs) {})) =
            rsimp_ALTs (rdistinct (rflts rs) {})"
   by (metis map_idI rsimp.simps(2) rsimp_idem)
 
 
 
 lemma alts_closed_form: 
-  shows "rsimp (rders_simp (RALTS rs) s) = rsimp (RALTS (map (\<lambda>r. rders_simp r s) rs))"
+  shows "rsimp (rders_simp (RALTS rs) s) = 
+         rsimp (RALTS (map (\<lambda>r. rders_simp r s) rs))"
   apply(induct s rule: rev_induct)
    apply simp
   apply simp