# HG changeset patch # User Chengsong # Date 1646700640 0 # Node ID c6351a96bf80646ee458739a66075c937070b5b9 # Parent 09a57446696a8a1451c78cc8fb68bcd5a19e5994 writeupforclosedforms diff -r 09a57446696a -r c6351a96bf80 ChengsongPhdThesis/ChengsongPhDThesis.tex --- a/ChengsongPhdThesis/ChengsongPhDThesis.tex Mon Mar 07 12:27:27 2022 +0000 +++ b/ChengsongPhdThesis/ChengsongPhDThesis.tex Tue Mar 08 00:50:40 2022 +0000 @@ -378,10 +378,99 @@ \subsection{an improved version of bit-coded algorithm: with simp!} -\subsection{a correctness proof for bitcoded} +\subsection{a correctness proof for bitcoded algorithm} \subsection{finiteness proof } +\subsubsection{closed form} +We can give the derivative of regular expressions +with respect to string a closed form with respect to simplification: +\begin{itemize} +\item +closed form for sequences: +\begin{verbatim} +lemma seq_closed_form: shows +"rsimp (rders_simp (RSEQ r1 r2) s) = +rsimp ( RALTS ( (RSEQ (rders_simp r1 s) r2) # + (map (rders r2) (vsuf s r1)) + ) + )" +\end{verbatim} +where the recursive function $\textit{vsuf}$ is defined as +\begin{verbatim} +fun vsuf :: "char list -> rrexp -> char list list" where +"vsuf [] _ = []" +|"vsuf (c#cs) r1 = (if (rnullable r1) then (vsuf cs (rder c r1)) @ [c # cs] + else (vsuf cs (rder c r1)) + ) " +\end{verbatim} +\item +closed form for stars: +\begin{verbatim} +lemma star_closed_form: + shows "rders_simp (RSTAR r0) (c#s) = +rsimp ( RALTS ( +(map (\lambda s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) +(star_updates s r [[c]]) ) ))" +\end{verbatim} +where the recursive function $\textit{star}\_\textit{updates}$ is defined as +\begin{verbatim} +fun star_update :: "char -> rrexp -> char list list -> char list list" where +"star_update c r [] = []" +|"star_update c r (s # Ss) = (if (rnullable (rders_simp r s)) + then (s@[c]) # [c] # (star_update c r Ss) + else (s@[c]) # (star_update c r Ss) )" + +fun star_updates :: "char list -> rrexp -> char list list -> char list list" + where +"star_updates [] r Ss = Ss" +| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)" + +\end{verbatim} + + +\end{itemize} +These closed form is a formalization of the intuition + that we can push in the derivatives +of compound regular expressions to its sub-expressions, and the resulting +expression is a linear combination of those sub-expressions' derivatives. +\subsubsection{Estimation of closed forms' size} +And thanks to $\textit{distinctBy}$ helping with deduplication, +the linear combination can be bounded by the set enumerating all +regular expressions up to a certain size : +\begin{verbatim} + +lemma star_closed_form_bounded_by_rdistinct_list_estimate: + shows "rsize (rsimp ( RALTS ( (map (\lambda s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) ))) <= + Suc (sum_list (map rsize (rdistinct (map (\lambda s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) {}) ) )" + + lemma distinct_list_rexp_up_to_certain_size_bouded_by_set_enumerating_up_to_that_size: + shows "\forallr\in set rs. (rsize r ) <= N ==> sum_list (map rsize (rdistinct rs {})) <= + (card (rexp_enum N))* N" + + lemma ind_hypo_on_ders_leads_to_stars_bounded: + shows "\foralls. rsize (rders_simp r0 s) <= N ==> + (sum_list (map rsize (rdistinct (map (\lambda s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) {}) ) ) <= +(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0))) +" +\end{verbatim} + +With the above 3 lemmas, we can argue that the inductive hypothesis +$r_0$'s derivatives is bounded above leads to $r_0^*$'s +derivatives being bounded above. +\begin{verbatim} + +lemma r0_bounded_star_bounded: + shows "\foralls. rsize (rders_simp r0 s) <= N ==> + \foralls. rsize (rders_simp (RSTAR r0) s) <= +(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))" +\end{verbatim} + + +And we have a similar argument for the sequence case. \subsection{stronger simplification} @@ -401,32 +490,7 @@ Deciding whether a string is in the language of the regex can be intuitively done by constructing an NFA\cite{Thompson_1968}: -and simulate the running of it: -\begin{figure} -\centering -\includegraphics[scale=0.5]{pics/regex_nfa_base.png} -\end{figure} - -\begin{figure} -\centering -\includegraphics[scale=0.5]{pics/regex_nfa_seq1.png} -\end{figure} - -\begin{figure} -\centering -\includegraphics[scale=0.5]{pics/regex_nfa_seq2.png} -\end{figure} - -\begin{figure} -\centering -\includegraphics[scale=0.5]{pics/regex_nfa_alt.png} -\end{figure} - - -\begin{figure} -\centering -\includegraphics[scale=0.5]{pics/regex_nfa_star.png} -\end{figure} +and simulate the running of it. Which should be simple enough that modern programmers have no problems with it at all? @@ -1898,6 +1962,9 @@ analysis approach by implementing them in the same language and then compare their performance. + +\section{discarded} +haha \bibliographystyle{plain} \bibliography{root,regex_time_complexity} diff -r 09a57446696a -r c6351a96bf80 thys2/ClosedForms.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys2/ClosedForms.thy Tue Mar 08 00:50:40 2022 +0000 @@ -0,0 +1,1006 @@ + +theory ClosedForms + imports "Lexer" "PDerivs" +begin + + +datatype rrexp = + RZERO +| RONE +| RCHAR char +| RSEQ rrexp rrexp +| RALTS "rrexp list" +| RSTAR rrexp + +abbreviation + "RALT r1 r2 \ RALTS [r1, r2]" + + + +fun + rnullable :: "rrexp \ bool" +where + "rnullable (RZERO) = False" +| "rnullable (RONE ) = True" +| "rnullable (RCHAR c) = False" +| "rnullable (RALTS rs) = (\r \ set rs. rnullable r)" +| "rnullable (RSEQ r1 r2) = (rnullable r1 \ rnullable r2)" +| "rnullable (RSTAR r) = True" + + +fun + rder :: "char \ rrexp \ rrexp" +where + "rder c (RZERO) = RZERO" +| "rder c (RONE) = RZERO" +| "rder c (RCHAR d) = (if c = d then RONE else RZERO)" +| "rder c (RALTS rs) = RALTS (map (rder c) rs)" +| "rder c (RSEQ r1 r2) = + (if rnullable r1 + then RALT (RSEQ (rder c r1) r2) (rder c r2) + else RSEQ (rder c r1) r2)" +| "rder c (RSTAR r) = RSEQ (rder c r) (RSTAR r)" + + +fun + rders :: "rrexp \ string \ rrexp" +where + "rders r [] = r" +| "rders r (c#s) = rders (rder c r) s" + +fun rdistinct :: "'a list \'a set \ 'a list" + where + "rdistinct [] acc = []" +| "rdistinct (x#xs) acc = + (if x \ acc then rdistinct xs acc + else x # (rdistinct xs ({x} \ acc)))" + + + + + +fun rflts :: "rrexp list \ rrexp list" + where + "rflts [] = []" +| "rflts (RZERO # rs) = rflts rs" +| "rflts ((RALTS rs1) # rs) = rs1 @ rflts rs" +| "rflts (r1 # rs) = r1 # rflts rs" + + +fun rsimp_ALTs :: " rrexp list \ rrexp" + where + "rsimp_ALTs [] = RZERO" +| "rsimp_ALTs [r] = r" +| "rsimp_ALTs rs = RALTS rs" + +fun rsimp_SEQ :: " rrexp \ rrexp \ rrexp" + where + "rsimp_SEQ RZERO _ = RZERO" +| "rsimp_SEQ _ RZERO = RZERO" +| "rsimp_SEQ RONE r2 = r2" +| "rsimp_SEQ r1 r2 = RSEQ r1 r2" + + +fun rsimp :: "rrexp \ rrexp" + where + "rsimp (RSEQ r1 r2) = rsimp_SEQ (rsimp r1) (rsimp r2)" +| "rsimp (RALTS rs) = rsimp_ALTs (rdistinct (rflts (map rsimp rs)) {}) " +| "rsimp r = r" + + +fun + rders_simp :: "rrexp \ string \ rrexp" +where + "rders_simp r [] = r" +| "rders_simp r (c#s) = rders_simp (rsimp (rder c r)) s" + +fun rsize :: "rrexp \ nat" where + "rsize RZERO = 1" +| "rsize (RONE) = 1" +| "rsize (RCHAR c) = 1" +| "rsize (RALTS rs) = Suc (sum_list (map rsize rs))" +| "rsize (RSEQ r1 r2) = Suc (rsize r1 + rsize r2)" +| "rsize (RSTAR r) = Suc (rsize r)" + + +fun rlist_size :: "rrexp list \ nat" where +"rlist_size (r # rs) = rsize r + rlist_size rs" +| "rlist_size [] = 0" + +fun vsuf :: "char list \ rrexp \ char list list" where +"vsuf [] _ = []" +|"vsuf (c#cs) r1 = (if (rnullable r1) then (vsuf cs (rder c r1)) @ [c # cs] + else (vsuf cs (rder c r1)) + ) " + +lemma seq_closed_form: shows +"rsimp (rders_simp (RSEQ r1 r2) s) = +rsimp ( RALTS ( (RSEQ (rders_simp r1 s) r2) # + (map (rders r2) (vsuf s r1)) + ) + )" + apply(induct s) + apply simp + sorry + + +fun star_update :: "char \ rrexp \ char list list \ char list list" where +"star_update c r [] = []" +|"star_update c r (s # Ss) = (if (rnullable (rders_simp r s)) + then (s@[c]) # [c] # (star_update c r Ss) + else (s@[c]) # (star_update c r Ss) )" + +fun star_updates :: "char list \ rrexp \ char list list \ char list list" + where +"star_updates [] r Ss = Ss" +| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)" + + +lemma star_closed_form: + shows "rders_simp (RSTAR r0) (c#s) = +rsimp ( RALTS ( (map (\s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates s r [[c]]) ) ))" + apply(induct s) + apply simp + sorry + + +lemma star_closed_form_bounded_by_rdistinct_list_estimate: + shows "rsize (rsimp ( RALTS ( (map (\s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) ))) \ + Suc (sum_list (map rsize (rdistinct (map (\s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) {}) ) )" + + sorry + +lemma distinct_list_rexp_up_to_certain_size_bouded_by_set_enumerating_up_to_that_size: + shows "\r\ set rs. (rsize r ) \ N \ sum_list (map rsize (rdistinct rs {})) \ + (card (rexp_enum N))* N" + sorry + + +lemma ind_hypo_on_ders_leads_to_stars_bounded: + shows "\s. rsize (rders_simp r0 s) \ N \ + (sum_list (map rsize (rdistinct (map (\s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) + (star_updates s r [[c]]) ) {}) ) ) \ +(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0))) +" + sorry + +lemma r0_bounded_star_bounded: + shows "\s. rsize (rders_simp r0 s) \ N \ + \s. rsize (rders_simp (RSTAR r0) s) \ +(card (rexp_enum (Suc (N + rsize (RSTAR r0))))) * (Suc (N + rsize (RSTAR r0)))" + + sorry + + +(*some basic facts about rsimp*) +lemma hand_made_def_rlist_size: + shows "rlist_size rs = sum_list (map rsize rs)" +proof (induct rs) + case Nil show ?case by simp +next + case (Cons a rs) thus ?case + by simp +qed + +lemma rder_rsimp_ALTs_commute: + shows " (rder x (rsimp_ALTs rs)) = rsimp_ALTs (map (rder x) rs)" + apply(induct rs) + apply simp + apply(case_tac rs) + apply simp + apply auto + done + + +lemma rsimp_aalts_smaller: + shows "rsize (rsimp_ALTs rs) \ rsize (RALTS rs)" + apply(induct rs) + apply simp + apply simp + apply(case_tac "rs = []") + apply simp + apply(subgoal_tac "\rsp ap. rs = ap # rsp") + apply(erule exE)+ + apply simp + apply simp + by(meson neq_Nil_conv) + + + + + +lemma rSEQ_mono: + shows "rsize (rsimp_SEQ r1 r2) \rsize ( RSEQ r1 r2)" + apply auto + apply(induct r1) + apply auto + apply(case_tac "r2") + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + done + +lemma ralts_cap_mono: + shows "rsize (RALTS rs) \ Suc ( sum_list (map rsize rs)) " + by simp + +lemma rflts_def_idiot: + shows "\ a \ RZERO; \rs1. a = RALTS rs1\ + \ rflts (a # rs) = a # rflts rs" + apply(case_tac a) + apply simp_all + done + + +lemma rflts_mono: + shows "sum_list (map rsize (rflts rs))\ sum_list (map rsize rs)" + apply(induct rs) + apply simp + apply(case_tac "a = RZERO") + apply simp + apply(case_tac "\rs1. a = RALTS rs1") + apply(erule exE) + apply simp + apply(subgoal_tac "rflts (a # rs) = a # (rflts rs)") + prefer 2 + using rflts_def_idiot apply blast + apply simp + done + +lemma rdistinct_smaller: shows "sum_list (map rsize (rdistinct rs ss)) \ +sum_list (map rsize rs )" + apply (induct rs arbitrary: ss) + apply simp + by (simp add: trans_le_add2) + +lemma rdistinct_phi_smaller: "sum_list (map rsize (rdistinct rs {})) \ sum_list (map rsize rs)" + by (simp add: rdistinct_smaller) + + +lemma rsimp_alts_mono : + shows "\x. (\xa. xa \ set x \ rsize (rsimp xa) \ rsize xa) \ +rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {})) \ Suc (sum_list (map rsize x))" + apply(subgoal_tac "rsize (rsimp_ALTs (rdistinct (rflts (map rsimp x)) {} )) + \ rsize (RALTS (rdistinct (rflts (map rsimp x)) {} ))") + prefer 2 + using rsimp_aalts_smaller apply auto[1] + apply(subgoal_tac "rsize (RALTS (rdistinct (rflts (map rsimp x)) {})) \Suc( sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})))") + prefer 2 + using ralts_cap_mono apply blast + apply(subgoal_tac "sum_list (map rsize (rdistinct (rflts (map rsimp x)) {})) \ + sum_list (map rsize ( (rflts (map rsimp x))))") + prefer 2 + using rdistinct_smaller apply presburger + apply(subgoal_tac "sum_list (map rsize (rflts (map rsimp x))) \ + sum_list (map rsize (map rsimp x))") + prefer 2 + using rflts_mono apply blast + apply(subgoal_tac "sum_list (map rsize (map rsimp x)) \ sum_list (map rsize x)") + prefer 2 + + apply (simp add: sum_list_mono) + by linarith + + + + + +lemma rsimp_mono: + shows "rsize (rsimp r) \ rsize r" + apply(induct r) + apply simp_all + apply(subgoal_tac "rsize (rsimp_SEQ (rsimp r1) (rsimp r2)) \ rsize (RSEQ (rsimp r1) (rsimp r2))") + apply force + using rSEQ_mono + apply presburger + using rsimp_alts_mono by auto + +lemma idiot: + shows "rsimp_SEQ RONE r = r" + apply(case_tac r) + apply simp_all + done + +lemma no_alt_short_list_after_simp: + shows "RALTS rs = rsimp r \ rsimp_ALTs rs = RALTS rs" + sorry + +lemma no_further_dB_after_simp: + shows "RALTS rs = rsimp r \ rdistinct rs {} = rs" + sorry + + +lemma idiot2: + shows " \r1 \ RZERO; r1 \ RONE;r2 \ RZERO\ + \ rsimp_SEQ r1 r2 = RSEQ r1 r2" + apply(case_tac r1) + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + apply(case_tac r2) + apply simp_all + done + +lemma rders__onechar: + shows " (rders_simp r [c]) = (rsimp (rders r [c]))" + by simp + +lemma rders_append: + "rders c (s1 @ s2) = rders (rders c s1) s2" + apply(induct s1 arbitrary: c s2) + apply(simp_all) + done + +lemma rders_simp_append: + "rders_simp c (s1 @ s2) = rders_simp (rders_simp c s1) s2" + apply(induct s1 arbitrary: c s2) + apply(simp_all) + done + +lemma inside_simp_removal: + shows " rsimp (rder x (rsimp r)) = rsimp (rder x r)" + sorry + +lemma set_related_list: + shows "distinct rs \ length rs = card (set rs)" + by (simp add: distinct_card) +(*this section deals with the property of distinctBy: creates a list without duplicates*) +lemma rdistinct_never_added_twice: + shows "rdistinct (a # rs) {a} = rdistinct rs {a}" + by force + + +lemma rdistinct_does_the_job: + shows "distinct (rdistinct rs s)" + apply(induct rs arbitrary: s) + apply simp + apply simp + sorry + +lemma rders_simp_same_simpders: + shows "s \ [] \ rders_simp r s = rsimp (rders r s)" + apply(induct s rule: rev_induct) + apply simp + apply(case_tac "xs = []") + apply simp + apply(simp add: rders_append rders_simp_append) + using inside_simp_removal by blast + +lemma simp_helps_der_pierce: + shows " rsimp + (rder x + (rsimp_ALTs rs)) = + rsimp + (rsimp_ALTs + (map (rder x ) + rs + ) + )" + sorry + + +lemma rders_simp_one_char: + shows "rders_simp r [c] = rsimp (rder c r)" + apply auto + done + +lemma rsimp_idem: + shows "rsimp (rsimp r) = rsimp r" + sorry + +corollary rsimp_inner_idem1: + shows "rsimp r = RSEQ r1 r2 \ rsimp r1 = r1 \ rsimp r2 = r2" + + sorry + +corollary rsimp_inner_idem2: + shows "rsimp r = RALTS rs \ \r' \ (set rs). rsimp r' = r'" + sorry + +corollary rsimp_inner_idem3: + shows "rsimp r = RALTS rs \ map rsimp rs = rs" + by (meson map_idI rsimp_inner_idem2) + +corollary rsimp_inner_idem4: + shows "rsimp r = RALTS rs \ flts rs = rs" + sorry + + +lemma head_one_more_simp: + shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)" + by (simp add: rsimp_idem) + +lemma head_one_more_dersimp: + shows "map rsimp ((rder x (rders_simp r s) # rs)) = map rsimp ((rders_simp r (s@[x]) ) # rs)" + using head_one_more_simp rders_simp_append rders_simp_one_char by presburger + + + + +lemma ders_simp_nullability: + shows "rnullable (rders r s) = rnullable (rders_simp r s)" + sorry + +lemma first_elem_seqder: + shows "\rnullable r1p \ map rsimp (rder x (RSEQ r1p r2) + # rs) = map rsimp ((RSEQ (rder x r1p) r2) # rs) " + by auto + +lemma first_elem_seqder1: + shows "\rnullable (rders_simp r xs) \ map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = + map rsimp ( (RSEQ (rsimp (rder x (rders_simp r xs))) r2) # rs)" + by (simp add: rsimp_idem) + +lemma first_elem_seqdersimps: + shows "\rnullable (rders_simp r xs) \ map rsimp ( (rder x (RSEQ (rders_simp r xs) r2)) # rs) = + map rsimp ( (RSEQ (rders_simp r (xs @ [x])) r2) # rs)" + using first_elem_seqder1 rders_simp_append by auto + + + + + +lemma seq_update_seq_ders: + shows "rsimp (rder c ( rsimp (RALTS ((RSEQ (rders_simp r1 s) r2) # +(map (rders_simp r2) Ss))))) = + rsimp (RALTS ((RSEQ (rders_simp r1 (s @ [c])) r2) # +(map (rders_simp r2) (seq_update c (rders_simp r1 s) Ss)))) " + sorry + +lemma seq_ders_closed_form1: + shows "\Ss. rders_simp (RSEQ r1 r2) [c] = rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # +(map ( rders_simp r2 ) Ss)))" + apply(case_tac "rnullable r1") + apply(subgoal_tac " rders_simp (RSEQ r1 r2) [c] = +rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [[c]])))") + prefer 2 + apply (simp add: rsimp_idem) + apply(rule_tac x = "[[c]]" in exI) + apply simp + apply(subgoal_tac " rders_simp (RSEQ r1 r2) [c] = +rsimp (RALTS ((RSEQ (rders_simp r1 [c]) r2) # (map (rders_simp r2) [])))") + apply blast + apply(simp add: rsimp_idem) + sorry + + + + + + + + +lemma simp_flatten2: + shows "rsimp (RALTS (r # [RALTS rs])) = rsimp (RALTS (r # rs))" + sorry + + +lemma simp_flatten: + shows "rsimp (RALTS ((RALTS rsa) # rsb)) = rsimp (RALTS (rsa @ rsb))" + + sorry + + + +(*^^^^^^^^^nullable_seq_with_list1 related ^^^^^^^^^^^^^^^^*) + + + + + + + + + + + +lemma non_zero_size: + shows "rsize r \ Suc 0" + apply(induct r) + apply auto done + +corollary size_geq1: + shows "rsize r \ 1" + by (simp add: non_zero_size) + + +lemma rexp_size_induct: + shows "\N r x5 a list. + \ rsize r = Suc N; r = RALTS x5; + x5 = a # list\ \\i j. rsize a = i \ rsize (RALTS list) = j \ i + j = Suc N \ i \ N \ j \ N" + apply(rule_tac x = "rsize a" in exI) + apply(rule_tac x = "rsize (RALTS list)" in exI) + apply(subgoal_tac "rsize a \ 1") + prefer 2 + using One_nat_def non_zero_size apply presburger + apply(subgoal_tac "rsize (RALTS list) \ 1 ") + prefer 2 + using size_geq1 apply blast + apply simp + done + +definition SEQ_set where + "SEQ_set A n \ {RSEQ r1 r2 | r1 r2. r1 \ A \ r2 \ A \ rsize r1 + rsize r2 \ n}" + +definition SEQ_set_cartesian where +"SEQ_set_cartesian A n = {RSEQ r1 r2 | r1 r2. r1 \ A \ r2 \ A}" + +definition ALT_set where +"ALT_set A n \ {RALTS rs | rs. set rs \ A \ sum_list (map rsize rs) \ n}" + + +definition + "sizeNregex N \ {r. rsize r \ N}" + +lemma sizenregex_induct: + shows "sizeNregex (Suc n) = sizeNregex n \ {RZERO, RONE, RALTS []} \ {RCHAR c| c. True} \ +SEQ_set ( sizeNregex n) n \ ALT_set (sizeNregex n) n \ (RSTAR ` (sizeNregex n))" + sorry + + +lemma chars_finite: + shows "finite (RCHAR ` (UNIV::(char set)))" + apply(simp) + done + +thm full_SetCompr_eq + +lemma size1finite: + shows "finite (sizeNregex (Suc 0))" + apply(subst sizenregex_induct) + apply(subst finite_Un)+ + apply(subgoal_tac "sizeNregex 0 = {}") + apply(rule conjI)+ + apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def) + apply simp + apply (simp add: full_SetCompr_eq) + apply (simp add: SEQ_set_def) + apply (simp add: ALT_set_def) + apply(simp add: full_SetCompr_eq) + using non_zero_size not_less_eq_eq sizeNregex_def by fastforce + +lemma seq_included_in_cart: + shows "SEQ_set A n \ SEQ_set_cartesian A n" + using SEQ_set_cartesian_def SEQ_set_def by fastforce + +lemma finite_seq: + shows " finite (sizeNregex n) \ finite (SEQ_set (sizeNregex n) n)" + apply(rule finite_subset) + sorry + + +lemma finite_size_n: + shows "finite (sizeNregex n)" + apply(induct n) + apply (metis Collect_empty_eq finite.emptyI non_zero_size not_less_eq_eq sizeNregex_def) + apply(subst sizenregex_induct) + apply(subst finite_Un)+ + apply(rule conjI)+ + apply simp + apply simp + apply (simp add: full_SetCompr_eq) + + sorry + + + + + + + + + + + + + + + + + + + + + +lemma star_update_case1: + shows "rnullable (rders_simp r s) \ star_update c r (s # Ss) = (s @ [c]) # [c] # (star_update c r Ss)" + + by force + +lemma star_update_case2: + shows "\rnullable (rders_simp r s) \ star_update c r (s # Ss) = (s @ [c]) # (star_update c r Ss)" + by simp + +lemma bubble_break: shows "rflts [r, RZERO] = rflts [r]" + apply(case_tac r) + apply simp+ + done + +lemma rsimp_alts_idem_aux1: + shows "rsimp_ALTs (rdistinct (rflts [rsimp a]) {}) = rsimp (RALTS [a])" + by force + + + +lemma rsimp_alts_idem_aux2: + shows "rsimp a = rsimp (RALTS [a])" + apply(simp) + apply(case_tac "rsimp a") + apply simp+ + apply (metis no_alt_short_list_after_simp no_further_dB_after_simp) + by simp + +lemma rsimp_alts_idem: + shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs (a # [(rsimp (rsimp_ALTs as))] ))" + apply(induct as) + apply(subgoal_tac "rsimp (rsimp_ALTs [a, rsimp (rsimp_ALTs [])]) = rsimp (rsimp_ALTs [a, RZERO])") + prefer 2 + apply simp + using bubble_break rsimp_alts_idem_aux2 apply auto[1] + apply(case_tac as) + apply(subgoal_tac "rsimp_ALTs( aa # as) = aa") + prefer 2 + apply simp + using head_one_more_simp apply fastforce + apply(subgoal_tac "rsimp_ALTs (aa # as) = RALTS (aa # as)") + prefer 2 + + using rsimp_ALTs.simps(3) apply presburger + + apply(simp only:) + apply(subgoal_tac "rsimp_ALTs (a # aa # aaa # list) = RALTS (a # aa # aaa # list)") + prefer 2 + using rsimp_ALTs.simps(3) apply presburger + apply(simp only:) + apply(subgoal_tac "rsimp_ALTs [a, rsimp (RALTS (aa # aaa # list))] = RALTS (a # [rsimp (RALTS (aa # aaa # list))])") + prefer 2 + + using rsimp_ALTs.simps(3) apply presburger + apply(simp only:) + using simp_flatten2 + apply(subgoal_tac " rsimp (RALT a (rsimp (RALTS (aa # aaa # list)))) = rsimp (RALT a ((RALTS (aa # aaa # list)))) ") + prefer 2 + + apply (metis head_one_more_simp list.simps(9) rsimp.simps(2)) + apply (simp only:) + done + + +lemma rsimp_alts_idem2: + shows "rsimp (rsimp_ALTs (a # as)) = rsimp (rsimp_ALTs ((rsimp a) # [(rsimp (rsimp_ALTs as))] ))" + using head_one_more_simp rsimp_alts_idem by auto + + +lemma evolution_step1: + shows "rsimp + (rsimp_ALTs + (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp + (rsimp_ALTs + (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [(rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)))])) " + using rsimp_alts_idem by auto + +lemma evolution_step2: + assumes " rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))" + shows "rsimp + (rsimp_ALTs + (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp + (rsimp_ALTs + (rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)) # [ rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))])) " + by (simp add: assms rsimp_alts_idem) + +lemma rsimp_seq_aux1: + shows "r = RONE \ r2 = RSTAR r0 \ rsimp_SEQ r r2 = r2" + apply simp + done + +lemma multiple_alts_simp_flatten: + shows "rsimp (RALT (RALT r1 r2) (rsimp_ALTs rs)) = rsimp (RALTS (r1 # r2 # rs))" + by (metis Cons_eq_appendI append_self_conv2 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem simp_flatten) + + +lemma evo3_main_aux1: + shows "rsimp + (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) = + rsimp + (RALTS + (RSEQ (rders_simp r (a @ [x])) (RSTAR r) # + RSEQ (rders_simp r [x]) (RSTAR r) # map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))" + apply(subgoal_tac "rsimp + (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) = +rsimp + (RALT (RALT (RSEQ ( (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))) ") + prefer 2 + apply (simp add: rsimp_idem) + apply (simp only:) + apply(subst multiple_alts_simp_flatten) + by simp + + +lemma evo3_main_nullable: + shows " +\a Ss. + \rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))); + rders_simp r a \ RONE; rders_simp r a \ RZERO; rnullable (rders_simp r a)\ + \ rsimp + (rsimp_ALTs + [rder x (RSEQ (rders_simp r a) (RSTAR r)), + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))" + apply(subgoal_tac "rder x (RSEQ (rders_simp r a) (RSTAR r)) + = RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r))") + prefer 2 + apply simp + apply(simp only:) + apply(subgoal_tac "star_update x r (a # Ss) = (a @ [x]) # [x] # (star_update x r Ss)") + prefer 2 + using star_update_case1 apply presburger + apply(simp only:) + apply(subst List.list.map(2))+ + apply(subgoal_tac "rsimp + (rsimp_ALTs + [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)), + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = +rsimp + (RALTS + [RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ (rder x r) (RSTAR r)), + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))])") + prefer 2 + using rsimp_ALTs.simps(3) apply presburger + apply(simp only:) + apply(subgoal_tac " rsimp + (rsimp_ALTs + (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) # + rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) += + rsimp + (RALTS + (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) # + rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))") + + prefer 2 + using rsimp_ALTs.simps(3) apply presburger + apply (simp only:) + apply(subgoal_tac " rsimp + (RALT (RALT (RSEQ (rder x (rders_simp r a)) (RSTAR r)) (RSEQ ( (rder x r)) (RSTAR r))) + (rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = + rsimp + (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r))) + (rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))") + prefer 2 + apply (simp add: rsimp_idem) + apply(simp only:) + apply(subgoal_tac " rsimp + (RALT (RALT (RSEQ (rsimp (rder x (rders_simp r a))) (RSTAR r)) (RSEQ (rsimp (rder x r)) (RSTAR r))) + (rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = + rsimp + (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + (rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))))") + prefer 2 + using rders_simp_append rders_simp_one_char rsimp_idem apply presburger + apply(simp only:) + apply(subgoal_tac " rsimp + (RALTS + (rsimp_SEQ (rders_simp r (a @ [x])) (RSTAR r) # + rsimp_SEQ (rders_simp r [x]) (RSTAR r) # map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) = + rsimp + (RALTS + (RSEQ (rders_simp r (a @ [x])) (RSTAR r) # + RSEQ (rders_simp r [x]) (RSTAR r) # map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))") + prefer 2 + apply (smt (z3) idiot2 list.simps(9) rrexp.distinct(9) rsimp.simps(1) rsimp.simps(2) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_idem) + apply(simp only:) + apply(subgoal_tac " rsimp + (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + (rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) = + rsimp + (RALT (RALT (RSEQ (rsimp (rders_simp r (a @ [x]))) (RSTAR r)) (RSEQ (rders_simp r [x]) (RSTAR r))) + ( (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))))) ") + prefer 2 + using rsimp_idem apply force + apply(simp only:) + using evo3_main_aux1 by blast + + +lemma evo3_main_not1: + shows " \rnullable (rders_simp r a) \ rder x (RSEQ (rders_simp r a) (RSTAR r)) = RSEQ (rder x (rders_simp r a)) (RSTAR r)" + by fastforce + + +lemma evo3_main_not2: + shows "\rnullable (rders_simp r a) \ rsimp + (rsimp_ALTs + (rder x (RSEQ (rders_simp r a) (RSTAR r)) # rs)) = rsimp + (rsimp_ALTs + ((RSEQ (rders_simp r (a @ [x])) (RSTAR r)) # rs))" + by (simp add: rders_simp_append rsimp_alts_idem2 rsimp_idem) + +lemma evo3_main_not3: + shows "rsimp + (rsimp_ALTs + (rsimp_SEQ r1 (RSTAR r) # rs)) = + rsimp (rsimp_ALTs + (RSEQ r1 (RSTAR r) # rs))" + by (metis idiot2 rrexp.distinct(9) rsimp.simps(1) rsimp.simps(3) rsimp.simps(4) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2) + + +lemma evo3_main_notnullable: + shows "\a Ss. + \rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))); + rders_simp r a \ RONE; rders_simp r a \ RZERO; \rnullable (rders_simp r a)\ + \ rsimp + (rsimp_ALTs + [rder x (RSEQ (rders_simp r a) (RSTAR r)), + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))" + apply(subst star_update_case2) + apply simp + apply(subst List.list.map(2)) + apply(subst evo3_main_not2) + apply simp + apply(subst evo3_main_not3) + using rsimp_alts_idem by presburger + + +lemma evo3_aux2: + shows "rders_simp r a = RONE \ rsimp_SEQ (rders_simp (rders_simp r a) [x]) (RSTAR r) = RZERO" + by simp +lemma evo3_aux3: + shows "rsimp (rsimp_ALTs (RZERO # rs)) = rsimp (rsimp_ALTs rs)" + by (metis list.simps(8) list.simps(9) rdistinct.simps(1) rflts.simps(1) rflts.simps(2) rsimp.simps(2) rsimp_ALTs.simps(1) rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) rsimp_alts_idem) + +lemma evo3_aux4: + shows " rsimp + (rsimp_ALTs + [RSEQ (rder x r) (RSTAR r), + rsimp (rsimp_ALTs rs)]) = + rsimp + (rsimp_ALTs + (rsimp_SEQ (rders_simp r [x]) (RSTAR r) # rs))" + by (metis rders_simp_one_char rsimp.simps(1) rsimp.simps(6) rsimp_alts_idem rsimp_alts_idem2) + +lemma evo3_aux5: + shows "rders_simp r a \ RONE \ rders_simp r a \ RZERO \ rsimp_SEQ (rders_simp r a) (RSTAR r) = RSEQ (rders_simp r a) (RSTAR r)" + using idiot2 by blast + + +lemma evolution_step3: + shows" \a Ss. + rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss))) \ + rsimp + (rsimp_ALTs + [rder x (rsimp_SEQ (rders_simp r a) (RSTAR r)), + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)))]) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r (a # Ss))))" + apply(case_tac "rders_simp r a = RONE") + apply(subst rsimp_seq_aux1) + apply simp + apply(subst rder.simps(6)) + apply(subgoal_tac "rnullable (rders_simp r a)") + prefer 2 + using rnullable.simps(2) apply presburger + apply(subst star_update_case1) + apply simp + + apply(subst List.list.map)+ + apply(subst rders_simp_append) + apply(subst evo3_aux2) + apply simp + apply(subst evo3_aux3) + apply(subst evo3_aux4) + apply simp + apply(case_tac "rders_simp r a = RZERO") + + apply (simp add: rsimp_alts_idem2) + apply(subgoal_tac "rders_simp r (a @ [x]) = RZERO") + prefer 2 + using rder.simps(1) rders_simp_append rders_simp_one_char rsimp.simps(3) apply presburger + using rflts.simps(2) rsimp.simps(3) rsimp_SEQ.simps(1) apply presburger + apply(subst evo3_aux5) + apply simp + apply(case_tac "rnullable (rders_simp r a) ") + using evo3_main_nullable apply blast + using evo3_main_notnullable apply blast + done + +(* +proof (prove) +goal (1 subgoal): + 1. map f (a # s) = f a # map f s +Auto solve_direct: the current goal can be solved directly with + HOL.nitpick_simp(115): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0 + List.list.map(2): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0 + List.list.simps(9): map ?f (?x21.0 # ?x22.0) = ?f ?x21.0 # map ?f ?x22.0 +*) +lemma starseq_list_evolution: + fixes r :: rrexp and Ss :: "char list list" and x :: char + shows "rsimp (rsimp_ALTs (map (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss) ) = + rsimp (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) (star_update x r Ss)) )" + apply(induct Ss) + apply simp + apply(subst List.list.map(2)) + apply(subst evolution_step2) + apply simp + + + sorry + + +lemma star_seqs_produce_star_seqs: + shows "rsimp (rsimp_ALTs (map (rder x \ (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r))) Ss)) + = rsimp (rsimp_ALTs (map ( (\s1. rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss))" + by (meson comp_apply) + +lemma map_der_lambda_composition: + shows "map (rder x) (map (\s. f s) Ss) = map (\s. (rder x (f s))) Ss" + by force + +lemma ralts_vs_rsimpalts: + shows "rsimp (RALTS rs) = rsimp (rsimp_ALTs rs)" + by (metis evo3_aux3 rsimp_ALTs.simps(2) rsimp_ALTs.simps(3) simp_flatten2) + + +lemma linearity_of_list_of_star_or_starseqs: + fixes r::rrexp and Ss::"char list list" and x::char + shows "\Ssa. rsimp (rder x (rsimp_ALTs (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))) = + rsimp (RALTS ( (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ssa)))" + apply(subst rder_rsimp_ALTs_commute) + apply(subst map_der_lambda_composition) + using starseq_list_evolution + apply(rule_tac x = "star_update x r Ss" in exI) + apply(subst ralts_vs_rsimpalts) + by simp + + + +(*certified correctness---does not depend on any previous sorry*) +lemma star_list_push_der: shows " \xs \ [] \ \Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)); + xs @ [x] \ []; xs \ []\ \ + \Ss. rders_simp (RSTAR r ) (xs @ [x]) = + rsimp (RALTS (map (\s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r))) ) Ss) )" + apply(subgoal_tac "\Ss. rders_simp (RSTAR r) xs = rsimp (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))") + prefer 2 + apply blast + apply(erule exE) + apply(subgoal_tac "rders_simp (RSTAR r) (xs @ [x]) = rsimp (rder x (rsimp (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))") + prefer 2 + using rders_simp_append + using rders_simp_one_char apply presburger + apply(rule_tac x= "Ss" in exI) + apply(subgoal_tac " rsimp (rder x (rsimp (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) = + rsimp (rsimp (rder x (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))") + prefer 2 + using inside_simp_removal rsimp_idem apply presburger + apply(subgoal_tac "rsimp (rsimp (rder x (RALTS (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) = + rsimp (rsimp (RALTS (map (rder x) (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss))))") + prefer 2 + using rder.simps(4) apply presburger + apply(subgoal_tac "rsimp (rsimp (RALTS (map (rder x) (map (\s1. rsimp_SEQ (rders_simp r s1) (RSTAR r)) Ss)))) = + rsimp (rsimp (RALTS (map (\s1. (rder x (rsimp_SEQ (rders_simp r s1) (RSTAR r)))) Ss)))") + apply (metis rsimp_idem) + by (metis map_der_lambda_composition) + + + +end