lexing: changeset 506:69ad05398894

--- a/ChengsongTanPhdThesis/Chapters/Chapter1.tex	Sat May 07 13:38:44 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Chapter1.tex	Sun May 08 13:26:31 2022 +0100
@@ -18,6 +18,7 @@
 \newcommand{\ASEQ}[3]{\textit{ASEQ}_{#1} \, #2 \, #3}
 \newcommand{\bderssimp}[2]{#1 \backslash_{bsimp} #2}
 \newcommand{\rderssimp}[2]{#1 \backslash_{rsimp} #2}
+\newcommand{\bders}[2]{#1 \backslash #2}
 \newcommand{\bsimp}[1]{\textit{bsimp}(#1)}
 \newcommand{\rsimp}[1]{\textit{rsimp}(#1)}
 \newcommand{\sflataux}[1]{\llparenthesis #1 \rrparenthesis'}
@@ -28,7 +29,7 @@
 \newcommand{\rdistinct}[2]{\textit{distinct} \; \textit{#1} \; #2}
 \newcommand\hflat[1]{\llparenthesis  #1 \rrparenthesis_*}
 \newcommand\hflataux[1]{\llparenthesis #1 \rrparenthesis_*'}
-\newcommand\createdByStar[1]{\textit{\textit{createdByStar}(#1)}}
+\newcommand\createdByStar[1]{\textit{createdByStar}(#1)}
 
 \newcommand\myequiv{\mathrel{\stackrel{\makebox[0pt]{\mbox{\normalfont\tiny equiv}}}{=}}}

--- a/ChengsongTanPhdThesis/Chapters/Chapter2.tex	Sat May 07 13:38:44 2022 +0100
+++ b/ChengsongTanPhdThesis/Chapters/Chapter2.tex	Sun May 08 13:26:31 2022 +0100
@@ -368,19 +368,92 @@
 We give a predicate for such "star-created" regular expressions:
 \begin{center}
 \begin{tabular}{lcr}
-         &    &       $\createdByStar{\RSEQ{ra}{\RSTAR{rb}}}$\\
+         &    &       $\createdByStar{(\RSEQ{ra}{\RSTAR{rb}}) }$\\
  $\createdByStar{r_1} \land \createdByStar{r_2} $ & $ \Longrightarrow$ & $\createdByStar{(r_1 + r_2)}$
  \end{tabular}
  \end{center}
  
+ These definitions allows us the flexibility to talk about 
+ regular expressions in their most convenient format,
+ for example, flattened out $\RALTS{[r_1, r_2, \ldots, r_n]} $
+ instead of binary-nested: $((r_1 + r_2) + (r_3 + r_4)) + \ldots$.
+ These definitions help express that certain classes of syntatically 
+ distinct regular expressions are actually the same under simplification.
+ This is not entirely true for annotated regular expressions: 
+ %TODO: bsimp bders \neq bderssimp
+ \begin{center}
+ $(1+ (c\cdot \ASEQ{bs}{c^*}{c} ))$
+ \end{center}
+ For bit-codes, the order in which simplification is applied
+ might cause a difference in the location they are placed.
+ If we want something like
+ \begin{center}
+ $\bderssimp{r}{s} \myequiv \bsimp{\bders{r}{s}}$
+ \end{center}
+ Some "canonicalization" procedure is required,
+ which either pushes all the common bitcodes to nodes
+  as senior as possible:
+  \begin{center}
+  $_{bs}(_{bs_1 @ bs'}r_1 + _{bs_1 @ bs''}r_2) \rightarrow _{bs @ bs_1}(_{bs'}r_1 + _{bs''}r_2) $
+  \end{center}
+ or does the reverse. However bitcodes are not of interest if we are talking about
+ the $\llbracket r \rrbracket$ size of a regex.
+ Therefore for the ease and simplicity of producing a
+ proof for a size bound, we are happy to restrict ourselves to 
+ unannotated regular expressions, and obtain such equalities as
+ \begin{lemma}
+ $\rsimp{(r_1 + r_2)} = \rsimp{\RALTS{\hflataux{r_1} @ \hflataux{r_2}}}$
+ \end{lemma}
+ 
+ \begin{proof}
+ By using the rewriting relation $\rightsquigarrow$
+ \end{proof}
+ %TODO: rsimp sflat
+And from this we obtain a proof that a star's derivative will be the same
+as if it had all its nested alternatives created during deriving being flattened out:
+ For example,
+ \begin{lemma}
+ $\createdByStar{r} \implies \rsimp{r} = \rsimp{\RALTS{\hflataux{r}}}$
+ \end{lemma}
+ \begin{proof}
+ By structural induction on $r$, where the induction rules are these of $\createdByStar{_}$.
+ \end{proof}
+% The simplification of a flattened out regular expression, provided it comes
+%from the derivative of a star, is the same as the one nested.
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+ 
+ 
 One might wonder the actual bound rather than the loose bound we gave
 for the convenience of a easier proof.
 How much can the regex $r^* \backslash s$ grow? As hinted earlier,
  they can grow at a maximum speed
-of exponential $w.r.t$ the number of characters.
-But they will eventually level off when the string $s$ is long enough,
+  exponential $w.r.t$ the number of characters, 
+but will eventually level off when the string $s$ is long enough,
 as suggested by the finiteness bound proof.
-
+And unfortunately we have concrete examples
+where such regexes grew exponentially large before levelling off:
+$(a ^ * + (a + aa) ^ * + (a + aa + aaa) ^ * + \ldots + 
+(a+ aa + aaa+\ldots \underbrace{a \ldots a}_{\text{n a's}})^*$ will already have a maximum
+size that is  exponential on the number $n$.
 %TODO: give out a graph showing how the size of the regex grows and levels off at a size exponential to the original regex
 
 
+While the tight upper bound will definitely be a lot lower than 
+the one we gave for the finiteness proof, 
+it will still be at least expoential, under our current simplification rules.
+
+This suggests stronger simplifications that keep the tight bound polynomial.
+
+%----------------------------------------------------------------------------------------
+%	SECTION 5
+%----------------------------------------------------------------------------------------
+\section{a stronger version of simplification}
+%TODO: search for isabelle proofs of algorithms that check equivalence 
+
+

--- a/ChengsongTanPhdThesis/main.tex	Sat May 07 13:38:44 2022 +0100
+++ b/ChengsongTanPhdThesis/main.tex	Sun May 08 13:26:31 2022 +0100
@@ -57,6 +57,8 @@
 
 %\usepackage{algorithm}
 \usepackage{amsmath}
+\usepackage{amsthm}
+\usepackage{amssymb}
 %\usepackage{mathtools}
 \usepackage[noend]{algpseudocode}
 \usepackage{enumitem}

--- a/thys3/ClosedForms.thy	Sat May 07 13:38:44 2022 +0100
+++ b/thys3/ClosedForms.thy	Sun May 08 13:26:31 2022 +0100
@@ -1526,15 +1526,22 @@
 
 lemma hfau_pushin: 
   shows "created_by_star r \<Longrightarrow> hflat_aux (rder c r) = concat (map hflat_aux (map (rder c) (hflat_aux r)))"
-  apply(induct r rule: created_by_star.induct)
-   apply simp
-  apply(subgoal_tac "created_by_star (rder c r1)")
-  prefer 2
-  apply(subgoal_tac "created_by_star (rder c r2)")
-  using cbs_ders_cbs apply blast
-  using cbs_ders_cbs apply auto[1]
-  apply simp
-  done
+
+proof(induct r rule: created_by_star.induct)
+  case (1 ra rb)
+  then show ?case by simp
+next
+  case (2 r1 r2)
+  then have "created_by_star (rder c r1)"
+    using cbs_ders_cbs by blast
+  then have "created_by_star (rder c r2)"
+    using "2.hyps"(3) cbs_ders_cbs by auto
+  then show ?case
+    apply simp
+    by (simp add: "2.hyps"(2) "2.hyps"(4))
+  qed
+
+
 
 lemma stupdate_induct1:
   shows " concat (map (hflat_aux \<circ> (rder x \<circ> (\<lambda>s1. RSEQ (rders r0 s1) (RSTAR r0)))) Ss) =
@@ -1606,9 +1613,16 @@
 
 lemma hfau_rsimpeq2:
   shows "created_by_star r \<Longrightarrow> rsimp r = rsimp ( (RALTS (hflat_aux r)))"
+  apply(induct rule: created_by_star.induct)
+   apply simp
+  apply (metis rsimp.simps(6) rsimp_seq_equal1)
+  using cbs_hfau_rsimpeq1 hflat_aux.simps(1) by presburger
+
+  
+(*
+lemma hfau_rsimpeq2_oldproof:  shows "created_by_star r \<Longrightarrow> rsimp r = rsimp ( (RALTS (hflat_aux r)))"
   apply(induct r)
        apply simp+
-  
     apply (metis rsimp_seq_equal1)
   prefer 2
    apply simp
@@ -1616,7 +1630,6 @@
    apply simp
   apply(case_tac "list")
    apply simp
-  
   apply (metis idem_after_simp1)
   apply(case_tac "lista")
   prefer 2
@@ -1627,6 +1640,7 @@
   using hflat_aux.simps(1) apply presburger
   apply simp
   using cbs_hfau_rsimpeq1 by fastforce
+*)
 
 lemma star_closed_form1:
   shows "rsimp (rders (RSTAR r0) (c#s)) =

author	Chengsong
	Sun, 08 May 2022 13:26:31 +0100
changeset 506	69ad05398894
parent 505	5ce3bd8e5696
child 507	213220f54a6e

ChengsongTanPhdThesis/Chapters/Chapter1.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/Chapters/Chapter2.tex		file \| annotate \| diff \| comparison \| revisions
ChengsongTanPhdThesis/main.tex		file \| annotate \| diff \| comparison \| revisions
thys3/ClosedForms.thy		file \| annotate \| diff \| comparison \| revisions