# HG changeset patch # User Chengsong # Date 1656103763 -3600 # Node ID 0f00d440f48471822ca965ca69ee76a92831db2d # Parent 3ea82d8f0aa4a8813ca722ef03f4e5bde5670d98 more changes diff -r 3ea82d8f0aa4 -r 0f00d440f484 ChengsongTanPhdThesis/Chapters/Finite.tex --- a/ChengsongTanPhdThesis/Chapters/Finite.tex Fri Jun 24 11:41:05 2022 +0100 +++ b/ChengsongTanPhdThesis/Chapters/Finite.tex Fri Jun 24 21:49:23 2022 +0100 @@ -206,6 +206,7 @@ simplification and derivatives can be done by the size of their unlifted counterpart with the unlifted version of simplification and derivatives applied. \begin{lemma} + The following equalities hold: \begin{itemize} \item $\asize{\bsimp{a}} = \rsize{\rsimp{\rerase{a}}}$ @@ -228,11 +229,23 @@ %----------------------------------- % SECTION ? %----------------------------------- - \section{Roadmap to a Finiteness Proof} + \subsection{Finiteness Proof Using $\rrexp$s} Now that we have defined the $\rrexp$ datatype, and proven that its size changes w.r.t derivatives and simplifications mirrors precisely those of annotated regular expressions, we aim to bound the size of $r \backslash s$ for any $\rrexp$ $r$. -The way we do it is by two steps: + Once we have a bound like: + \[ + \llbracket r \backslash_{rsimp} s \rrbracket_r \leq N_r + \] + \noindent + we could easily extend that to + \[ + \llbracket a \backslash_{bsimps} s \rrbracket \leq N_r. + \] + + \subsection{Roadmap to a Bound for $\textit{Rrexp}$} + +The way we obtain the bound for $\rrexp$s is by two steps: \begin{itemize} \item First, we rewrite $r\backslash s$ into something else that is easier @@ -240,23 +253,24 @@ $r_1 \cdot r_2$ and $r^*$, where the derivative can grow and bloat in a wild way, but after simplification they will always be equal or smaller to a form consisting of an alternative list of regular expressions $f \; (g\; (\sum rs))$ with some functions applied to it, where each element will be distinct after the function application. - The functions $f$ and $g$ are usually from $\flts$ $\distinctBy$ and other appearing in $\bsimp{_}$. - This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the - right hand side the "closed form" of $r\backslash s$. \item Then, for such a sum list of regular expressions $f\; (g\; (\sum rs))$, we can control its size by estimation, since $\distinctBy$ and $\flts$ are well-behaved and working together would only - reduce the size of a regular expression, not adding to it. The closed form $f\; (g\; (\sum rs)) $ is controlled - by the size of $rs'$, where every element in $rs'$ is distinct, and each element can be described by som e inductive sub-structures (for example when $r = r_1 \cdot r_2$ then $rs'$ will be solely comprised of $r_1 \backslash s'$ and $r_2 \backslash s''$, $s'$ and $s''$ being sub-strings of $s$), which will have a numeric uppder bound according to inductive hypothesis, which controls $r \backslash s$. + reduce the size of a regular expression, not adding to it. \end{itemize} -\section{Closed Forms} - +\section{Step One: Closed Forms} + We transform the function application $\rderssimp{r}{s}$ + into an equivalent form $f\; (g \; (\sum rs))$. + The functions $f$ and $g$ can be anything from $\flts$, $\distinctBy$ and other helper functions from $\bsimp{\_}$. + This way we get a different but equivalent way of expressing : $r\backslash s = f \; (g\; (\sum rs))$, we call the + right hand side the "closed form" of $r\backslash s$. \subsection{Basic Properties needed for Closed Forms} \subsubsection{$\textit{rdistinct}$'s Deduplicates Successfully} The $\textit{rdistinct}$ function, as its name suggests, will -remove duplicates according to the accumulator +remove duplicates in an \emph{r}$\textit{rexp}$ list, +according to the accumulator and leave only one of each different element in a list: \begin{lemma} The function $\textit{rdistinct}$ satisfies the following @@ -314,15 +328,15 @@ \begin{lemma} If $rs_1$'s elements are all unique, and not appearing in the accumulator $acc$, then it can be taken out and left untouched in the output: - \[\textit{rdistinct}\; rs_1 @ rsa\;\, acc - = rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1)\] + \[\textit{rdistinct}\; (rs_1 @ rsa)\;\, acc + = rs_1@(\textit{rdistinct} rsa \; (acc \cup rs_1))\] \end{lemma} \begin{proof} By an induction on $rs_1$, where $rsa$ and $acc$ are allowed to be arbitrary. \end{proof} -\subsubsection{The properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$} -We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and with $@$, the concatenation operator. +\subsubsection{The Properties of $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS}$} +We give in this subsection some properties of how $\backslash_r$, $\backslash_{rsimp}$, $\textit{Rflts}$ and $\textit{Rsimp}_{ALTS} $ interact with each other and with $@$, the concatenation operator. These will be helpful in later closed form proofs, when we want to transform the ways in which multiple functions involving those are composed together @@ -406,7 +420,10 @@ Fortunately this is provable by induction on the list $rs$ +\section{Estimating the Closed Forms' sizes} + The closed form $f\; (g\; (\sum rs)) $ is controlled + by the size of $rs'$, where every element in $rs'$ is distinct, and each element can be described by som e inductive sub-structures (for example when $r = r_1 \cdot r_2$ then $rs'$ will be solely comprised of $r_1 \backslash s'$ and $r_2 \backslash s''$, $s'$ and $s''$ being sub-strings of $s$), which will have a numeric uppder bound according to inductive hypothesis, which controls $r \backslash s$. %----------------------------------- % SECTION 2 %----------------------------------- diff -r 3ea82d8f0aa4 -r 0f00d440f484 thys2/BasicIdentities.thy --- a/thys2/BasicIdentities.thy Fri Jun 24 11:41:05 2022 +0100 +++ b/thys2/BasicIdentities.thy Fri Jun 24 21:49:23 2022 +0100 @@ -248,6 +248,9 @@ + + + lemma rSEQ_mono: shows "rsize (rsimp_SEQ r1 r2) \rsize ( RSEQ r1 r2)" apply auto @@ -1138,7 +1141,7 @@ shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)" by (simp add: rsimp_idem) -lemma head_one_more_dersimp: +corollary head_one_more_dersimp: shows "map rsimp ((rder x (rders_simp r s) # rs)) = map rsimp ((rders_simp r (s@[x]) ) # rs)" using head_one_more_simp rders_simp_append rders_simp_one_char by presburger diff -r 3ea82d8f0aa4 -r 0f00d440f484 thys3/BlexerSimp.thy --- a/thys3/BlexerSimp.thy Fri Jun 24 11:41:05 2022 +0100 +++ b/thys3/BlexerSimp.thy Fri Jun 24 21:49:23 2022 +0100 @@ -266,10 +266,22 @@ shows " \erase a \ erase ` set rs1\ \ (rs1 @ a # rs2) s\* (rs1 @ rs2)" apply(subgoal_tac "\rs1a rs1b x. rs1 = rs1a @ [x] @ rs1b \ erase x = erase a") prefer 2 + apply (meson existing_preimage) + apply(erule exE)+ + apply simp + apply(subgoal_tac "(rs1a @ [x] @ rs1b @ [a] @ rs2) s\ (rs1a @ [x] @ rs1b @ rs2)") + apply (simp add: rs_in_rstar) + apply(subgoal_tac "L (erase a) \ L (erase x)") + using ss6 apply presburger + by simp +lemma ss6_realistic: + shows "(rs1 @ rs2) s\* (rs1 @ dB6 rs2 (set (concat (map turnIntoTerms (map erase rs2)))))" + sorry + lemma ss6_stronger_aux: shows "(rs1 @ rs2) s\* (rs1 @ dB6 rs2 (set (map erase rs1)))" apply(induct rs2 arbitrary: rs1) @@ -280,6 +292,9 @@ apply(subgoal_tac "(rs1 @ a # rs2) s\* (rs1 @ rs2)") using srewrites_trans apply blast using deletes_dB apply presburger + apply(case_tac "set (turnIntoTerms (erase a)) \ erase ` set rs1") + + apply simp apply(auto) prefer 2