# HG changeset patch # User Chengsong # Date 1652012791 -3600 # Node ID 69ad05398894995a25d80ef5798043633822f2b0 # Parent 5ce3bd8e5696797b926af805aa580111e2d77d87 thesis chapter 2 section 2.4 2.5 Isarfied ClosedForms.thy diff -r 5ce3bd8e5696 -r 69ad05398894 ChengsongTanPhdThesis/Chapters/Chapter1.tex --- a/ChengsongTanPhdThesis/Chapters/Chapter1.tex Sat May 07 13:38:44 2022 +0100 +++ b/ChengsongTanPhdThesis/Chapters/Chapter1.tex Sun May 08 13:26:31 2022 +0100 @@ -18,6 +18,7 @@ \newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3} \newcommand{\bderssimp}[2]{#1 \backslash_{bsimp} #2} \newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2} +\newcommand{\bders}[2]{#1 \backslash #2} \newcommand{\bsimp}[1]{\textit{bsimp}(#1)} \newcommand{\rsimp}[1]{\textit{rsimp}(#1)} \newcommand{\sflataux}[1]{\llparenthesis #1 \rrparenthesis'} @@ -28,7 +29,7 @@ \newcommand{\rdistinct}[2]{\textit{distinct} \; \textit{#1} \; #2} \newcommand\hflat[1]{\llparenthesis #1 \rrparenthesis_*} \newcommand\hflataux[1]{\llparenthesis #1 \rrparenthesis_*'} -\newcommand\createdByStar[1]{\textit{\textit{createdByStar}(#1)}} +\newcommand\createdByStar[1]{\textit{createdByStar}(#1)} \newcommand\myequiv{\mathrel{\stackrel{\makebox[0pt]{\mbox{\normalfont\tiny equiv}}}{=}}} diff -r 5ce3bd8e5696 -r 69ad05398894 ChengsongTanPhdThesis/Chapters/Chapter2.tex --- a/ChengsongTanPhdThesis/Chapters/Chapter2.tex Sat May 07 13:38:44 2022 +0100 +++ b/ChengsongTanPhdThesis/Chapters/Chapter2.tex Sun May 08 13:26:31 2022 +0100 @@ -368,19 +368,92 @@ We give a predicate for such "star-created" regular expressions: \begin{center} \begin{tabular}{lcr} - & & $\createdByStar{\RSEQ{ra}{\RSTAR{rb}}}$\\ + & & $\createdByStar{(\RSEQ{ra}{\RSTAR{rb}}) }$\\ $\createdByStar{r_1} \land \createdByStar{r_2} $ & $ \Longrightarrow$ & $\createdByStar{(r_1 + r_2)}$ \end{tabular} \end{center} + These definitions allows us the flexibility to talk about + regular expressions in their most convenient format, + for example, flattened out $\RALTS{[r_1, r_2, \ldots, r_n]} $ + instead of binary-nested: $((r_1 + r_2) + (r_3 + r_4)) + \ldots$. + These definitions help express that certain classes of syntatically + distinct regular expressions are actually the same under simplification. + This is not entirely true for annotated regular expressions: + %TODO: bsimp bders \neq bderssimp + \begin{center} + $(1+ (c\cdot \ASEQ{bs}{c^*}{c} ))$ + \end{center} + For bit-codes, the order in which simplification is applied + might cause a difference in the location they are placed. + If we want something like + \begin{center} + $\bderssimp{r}{s} \myequiv \bsimp{\bders{r}{s}}$ + \end{center} + Some "canonicalization" procedure is required, + which either pushes all the common bitcodes to nodes + as senior as possible: + \begin{center} + $_{bs}(_{bs_1 @ bs'}r_1 + _{bs_1 @ bs''}r_2) \rightarrow _{bs @ bs_1}(_{bs'}r_1 + _{bs''}r_2) $ + \end{center} + or does the reverse. However bitcodes are not of interest if we are talking about + the $\llbracket r \rrbracket$ size of a regex. + Therefore for the ease and simplicity of producing a + proof for a size bound, we are happy to restrict ourselves to + unannotated regular expressions, and obtain such equalities as + \begin{lemma} + $\rsimp{(r_1 + r_2)} = \rsimp{\RALTS{\hflataux{r_1} @ \hflataux{r_2}}}$ + \end{lemma} + + \begin{proof} + By using the rewriting relation $\rightsquigarrow$ + \end{proof} + %TODO: rsimp sflat +And from this we obtain a proof that a star's derivative will be the same +as if it had all its nested alternatives created during deriving being flattened out: + For example, + \begin{lemma} + $\createdByStar{r} \implies \rsimp{r} = \rsimp{\RALTS{\hflataux{r}}}$ + \end{lemma} + \begin{proof} + By structural induction on $r$, where the induction rules are these of $\createdByStar{_}$. + \end{proof} +% The simplification of a flattened out regular expression, provided it comes +%from the derivative of a star, is the same as the one nested. + + + + + + + + + One might wonder the actual bound rather than the loose bound we gave for the convenience of a easier proof. How much can the regex $r^* \backslash s$ grow? As hinted earlier, they can grow at a maximum speed -of exponential $w.r.t$ the number of characters. -But they will eventually level off when the string $s$ is long enough, + exponential $w.r.t$ the number of characters, +but will eventually level off when the string $s$ is long enough, as suggested by the finiteness bound proof. - +And unfortunately we have concrete examples +where such regexes grew exponentially large before levelling off: +$(a ^ * + (a + aa) ^ * + (a + aa + aaa) ^ * + \ldots + +(a+ aa + aaa+\ldots \underbrace{a \ldots a}_{\text{n a's}})^*$ will already have a maximum +size that is exponential on the number $n$. %TODO: give out a graph showing how the size of the regex grows and levels off at a size exponential to the original regex +While the tight upper bound will definitely be a lot lower than +the one we gave for the finiteness proof, +it will still be at least expoential, under our current simplification rules. + +This suggests stronger simplifications that keep the tight bound polynomial. + +%---------------------------------------------------------------------------------------- +% SECTION 5 +%---------------------------------------------------------------------------------------- +\section{a stronger version of simplification} +%TODO: search for isabelle proofs of algorithms that check equivalence + + diff -r 5ce3bd8e5696 -r 69ad05398894 ChengsongTanPhdThesis/main.tex --- a/ChengsongTanPhdThesis/main.tex Sat May 07 13:38:44 2022 +0100 +++ b/ChengsongTanPhdThesis/main.tex Sun May 08 13:26:31 2022 +0100 @@ -57,6 +57,8 @@ %\usepackage{algorithm} \usepackage{amsmath} +\usepackage{amsthm} +\usepackage{amssymb} %\usepackage{mathtools} \usepackage[noend]{algpseudocode} \usepackage{enumitem} diff -r 5ce3bd8e5696 -r 69ad05398894 thys3/ClosedForms.thy --- a/thys3/ClosedForms.thy Sat May 07 13:38:44 2022 +0100 +++ b/thys3/ClosedForms.thy Sun May 08 13:26:31 2022 +0100 @@ -1526,15 +1526,22 @@ lemma hfau_pushin: shows "created_by_star r \ hflat_aux (rder c r) = concat (map hflat_aux (map (rder c) (hflat_aux r)))" - apply(induct r rule: created_by_star.induct) - apply simp - apply(subgoal_tac "created_by_star (rder c r1)") - prefer 2 - apply(subgoal_tac "created_by_star (rder c r2)") - using cbs_ders_cbs apply blast - using cbs_ders_cbs apply auto[1] - apply simp - done + +proof(induct r rule: created_by_star.induct) + case (1 ra rb) + then show ?case by simp +next + case (2 r1 r2) + then have "created_by_star (rder c r1)" + using cbs_ders_cbs by blast + then have "created_by_star (rder c r2)" + using "2.hyps"(3) cbs_ders_cbs by auto + then show ?case + apply simp + by (simp add: "2.hyps"(2) "2.hyps"(4)) + qed + + lemma stupdate_induct1: shows " concat (map (hflat_aux \ (rder x \ (\s1. RSEQ (rders r0 s1) (RSTAR r0)))) Ss) = @@ -1606,9 +1613,16 @@ lemma hfau_rsimpeq2: shows "created_by_star r \ rsimp r = rsimp ( (RALTS (hflat_aux r)))" + apply(induct rule: created_by_star.induct) + apply simp + apply (metis rsimp.simps(6) rsimp_seq_equal1) + using cbs_hfau_rsimpeq1 hflat_aux.simps(1) by presburger + + +(* +lemma hfau_rsimpeq2_oldproof: shows "created_by_star r \ rsimp r = rsimp ( (RALTS (hflat_aux r)))" apply(induct r) apply simp+ - apply (metis rsimp_seq_equal1) prefer 2 apply simp @@ -1616,7 +1630,6 @@ apply simp apply(case_tac "list") apply simp - apply (metis idem_after_simp1) apply(case_tac "lista") prefer 2 @@ -1627,6 +1640,7 @@ using hflat_aux.simps(1) apply presburger apply simp using cbs_hfau_rsimpeq1 by fastforce +*) lemma star_closed_form1: shows "rsimp (rders (RSTAR r0) (c#s)) =