# HG changeset patch # User Chengsong # Date 1685085017 -3600 # Node ID 70c10dc41606a7ee92794f2e462f3b5b5627881e # Parent 56057198e4f5a4c9929da1513420c390af33fd38# Parent 304a12cdda6ffb20b66e77193d32e362d0532dca changes from Christian diff -r 56057198e4f5 -r 70c10dc41606 Literature/asperti-pointed.pdf Binary file Literature/asperti-pointed.pdf has changed diff -r 56057198e4f5 -r 70c10dc41606 progs/scala/re-annotated2.sc --- a/progs/scala/re-annotated2.sc Fri May 26 08:09:30 2023 +0100 +++ b/progs/scala/re-annotated2.sc Fri May 26 08:10:17 2023 +0100 @@ -211,7 +211,7 @@ case (_, AZERO) => AZERO case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) // needed in order to keep the size down - case (AALTS(bs, rs), r2) => AALTS(bs1 ++ bs, rs.map(ASEQ(Nil, _, r2))) + //case (AALTS(bs, rs), r2) => AALTS(bs1 ++ bs, rs.map(ASEQ(Nil, _, r2))) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } // distinctBy deletes copies of the same "erased" regex @@ -223,6 +223,11 @@ case r => r } +def bders_simp(r: ARexp, s: List[Char]) : ARexp = s match { + case Nil => r + case c::cs => bders_simp(bsimp(bder(c, r)), cs) +} + def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { case Nil => if (bnullable(r)) bmkeps(r) else throw new Exception("Not matched") @@ -368,3 +373,16 @@ val n = 200 println(s"lexing fib program ($n times, size ${prog2.length * n})") println(time_needed(1, blexing_simp(WHILE_REGS, prog2 * n))) + + + + + +val reg2 = STAR("a" | "aa") + +println(bsize(bders_simp(internalise(reg2), ("a" * 0).toList))) +println(bsize(bders_simp(internalise(reg2), ("a" * 1).toList))) +println(bsize(bders_simp(internalise(reg2), ("a" * 2).toList))) +println(bsize(bders_simp(internalise(reg2), ("a" * 3).toList))) +println(bsize(bders_simp(internalise(reg2), ("a" * 4).toList))) +println(bsize(bders_simp(internalise(reg2), ("a" * 50000).toList))) diff -r 56057198e4f5 -r 70c10dc41606 thys/Exercises.thy --- a/thys/Exercises.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys/Exercises.thy Fri May 26 08:10:17 2023 +0100 @@ -30,7 +30,10 @@ zeroable r1 \ zeroable r2 \ (atmostempty r1 \ atmostempty r2)" | "atmostempty (STAR r) = atmostempty r" - +lemma "atmostempty r \ (zeroable r \ nullable r)" + apply(induct r) + apply(auto) + done fun somechars :: "rexp \ bool" @@ -40,8 +43,7 @@ | "somechars (CH c) \ True" | "somechars (ALT r1 r2) \ somechars r1 \ somechars r2" | "somechars (SEQ r1 r2) \ - (\ zeroable r1 \ somechars r2) \ (\ zeroable r2 \ somechars r1) \ - (somechars r1 \ nullable r2) \ (somechars r2 \ nullable r1)" + (\ zeroable r1 \ somechars r2) \ (\ zeroable r2 \ somechars r1)" | "somechars (STAR r) \ somechars r" lemma somechars_correctness: diff -r 56057198e4f5 -r 70c10dc41606 thys3/BasicIdentities.thy --- a/thys3/BasicIdentities.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/BasicIdentities.thy Fri May 26 08:10:17 2023 +0100 @@ -1,7 +1,201 @@ theory BasicIdentities - imports "RfltsRdistinctProps" + imports "Lexer" begin +datatype rrexp = + RZERO +| RONE +| RCHAR char +| RSEQ rrexp rrexp +| RALTS "rrexp list" +| RSTAR rrexp +| RNTIMES rrexp nat + +abbreviation + "RALT r1 r2 \ RALTS [r1, r2]" + + +fun + rnullable :: "rrexp \ bool" +where + "rnullable (RZERO) = False" +| "rnullable (RONE) = True" +| "rnullable (RCHAR c) = False" +| "rnullable (RALTS rs) = (\r \ set rs. rnullable r)" +| "rnullable (RSEQ r1 r2) = (rnullable r1 \ rnullable r2)" +| "rnullable (RSTAR r) = True" +| "rnullable (RNTIMES r n) = (if n = 0 then True else rnullable r)" + +fun + rder :: "char \ rrexp \ rrexp" +where + "rder c (RZERO) = RZERO" +| "rder c (RONE) = RZERO" +| "rder c (RCHAR d) = (if c = d then RONE else RZERO)" +| "rder c (RALTS rs) = RALTS (map (rder c) rs)" +| "rder c (RSEQ r1 r2) = + (if rnullable r1 + then RALT (RSEQ (rder c r1) r2) (rder c r2) + else RSEQ (rder c r1) r2)" +| "rder c (RSTAR r) = RSEQ (rder c r) (RSTAR r)" +| "rder c (RNTIMES r n) = (if n = 0 then RZERO else RSEQ (rder c r) (RNTIMES r (n - 1)))" + +fun + rders :: "rrexp \ string \ rrexp" +where + "rders r [] = r" +| "rders r (c#s) = rders (rder c r) s" + +fun rdistinct :: "'a list \'a set \ 'a list" + where + "rdistinct [] acc = []" +| "rdistinct (x#xs) acc = + (if x \ acc then rdistinct xs acc + else x # (rdistinct xs ({x} \ acc)))" + +lemma rdistinct1: + assumes "a \ acc" + shows "a \ set (rdistinct rs acc)" + using assms + apply(induct rs arbitrary: acc a) + apply(auto) + done + + +lemma rdistinct_does_the_job: + shows "distinct (rdistinct rs s)" + apply(induct rs s rule: rdistinct.induct) + apply(auto simp add: rdistinct1) + done + + + +lemma rdistinct_concat: + assumes "set rs \ rset" + shows "rdistinct (rs @ rsa) rset = rdistinct rsa rset" + using assms + apply(induct rs) + apply simp+ + done + +lemma distinct_not_exist: + assumes "a \ set rs" + shows "rdistinct rs rset = rdistinct rs (insert a rset)" + using assms + apply(induct rs arbitrary: rset) + apply(auto) + done + +lemma rdistinct_on_distinct: + shows "distinct rs \ rdistinct rs {} = rs" + apply(induct rs) + apply simp + using distinct_not_exist by fastforce + +lemma distinct_rdistinct_append: + assumes "distinct rs1" "\r \ set rs1. r \ acc" + shows "rdistinct (rs1 @ rsa) acc = rs1 @ (rdistinct rsa (acc \ set rs1))" + using assms + apply(induct rs1 arbitrary: rsa acc) + apply(auto)[1] + apply(auto)[1] + apply(drule_tac x="rsa" in meta_spec) + apply(drule_tac x="{a} \ acc" in meta_spec) + apply(simp) + apply(drule meta_mp) + apply(auto)[1] + apply(simp) + done + + +lemma rdistinct_set_equality1: + shows "set (rdistinct rs acc) = set rs - acc" + apply(induct rs acc rule: rdistinct.induct) + apply(auto) + done + + +lemma rdistinct_set_equality: + shows "set (rdistinct rs {}) = set rs" + by (simp add: rdistinct_set_equality1) + + +fun rflts :: "rrexp list \ rrexp list" + where + "rflts [] = []" +| "rflts (RZERO # rs) = rflts rs" +| "rflts ((RALTS rs1) # rs) = rs1 @ rflts rs" +| "rflts (r1 # rs) = r1 # rflts rs" + + +lemma rflts_def_idiot: + shows "\ a \ RZERO; \rs1. a = RALTS rs1\ \ rflts (a # rs) = a # rflts rs" + apply(case_tac a) + apply simp_all + done + +lemma rflts_def_idiot2: + shows "\a \ RZERO; \rs1. a = RALTS rs1; a \ set rs\ \ a \ set (rflts rs)" + apply(induct rs rule: rflts.induct) + apply(auto) + done + +lemma flts_append: + shows "rflts (rs1 @ rs2) = rflts rs1 @ rflts rs2" + apply(induct rs1) + apply simp + apply(case_tac a) + apply simp+ + done + + +fun rsimp_ALTs :: " rrexp list \ rrexp" + where + "rsimp_ALTs [] = RZERO" +| "rsimp_ALTs [r] = r" +| "rsimp_ALTs rs = RALTS rs" + +lemma rsimpalts_conscons: + shows "rsimp_ALTs (r1 # rsa @ r2 # rsb) = RALTS (r1 # rsa @ r2 # rsb)" + by (metis Nil_is_append_conv list.exhaust rsimp_ALTs.simps(3)) + +lemma rsimp_alts_equal: + shows "rsimp_ALTs (rsa @ a # rsb @ a # rsc) = RALTS (rsa @ a # rsb @ a # rsc) " + by (metis append_Cons append_Nil neq_Nil_conv rsimpalts_conscons) + + +fun rsimp_SEQ :: " rrexp \ rrexp \ rrexp" + where + "rsimp_SEQ RZERO _ = RZERO" +| "rsimp_SEQ _ RZERO = RZERO" +| "rsimp_SEQ RONE r2 = r2" +| "rsimp_SEQ r1 r2 = RSEQ r1 r2" + + +fun rsimp :: "rrexp \ rrexp" + where + "rsimp (RSEQ r1 r2) = rsimp_SEQ (rsimp r1) (rsimp r2)" +| "rsimp (RALTS rs) = rsimp_ALTs (rdistinct (rflts (map rsimp rs)) {}) " +| "rsimp r = r" + + +fun + rders_simp :: "rrexp \ string \ rrexp" +where + "rders_simp r [] = r" +| "rders_simp r (c#s) = rders_simp (rsimp (rder c r)) s" + +fun rsize :: "rrexp \ nat" where + "rsize RZERO = 1" +| "rsize (RONE) = 1" +| "rsize (RCHAR c) = 1" +| "rsize (RALTS rs) = Suc (sum_list (map rsize rs))" +| "rsize (RSEQ r1 r2) = Suc (rsize r1 + rsize r2)" +| "rsize (RSTAR r) = Suc (rsize r)" +| "rsize (RNTIMES r n) = Suc (rsize r) + n" + +abbreviation rsizes where + "rsizes rs \ sum_list (map rsize rs)" lemma rder_rsimp_ALTs_commute: @@ -14,7 +208,6 @@ done - lemma rsimp_aalts_smaller: shows "rsize (rsimp_ALTs rs) \ rsize (RALTS rs)" apply(induct rs) @@ -46,7 +239,9 @@ apply(case_tac r2) apply simp_all apply(case_tac r2) - apply simp_all + apply simp_all + apply(case_tac r2) + apply simp_all done lemma ralts_cap_mono: @@ -138,7 +333,9 @@ apply(case_tac r2) apply simp_all apply(case_tac r2) - apply simp_all + apply simp_all +apply(case_tac r2) + apply simp_all done lemma rders__onechar: @@ -165,8 +362,26 @@ +fun nonalt :: "rrexp \ bool" + where + "nonalt (RALTS rs) = False" +| "nonalt r = True" +fun good :: "rrexp \ bool" where + "good RZERO = False" +| "good (RONE) = True" +| "good (RCHAR c) = True" +| "good (RALTS []) = False" +| "good (RALTS [r]) = False" +| "good (RALTS (r1 # r2 # rs)) = ((distinct ( (r1 # r2 # rs))) \(\r' \ set (r1 # r2 # rs). good r' \ nonalt r'))" +| "good (RSEQ RZERO _) = False" +| "good (RSEQ RONE _) = False" +| "good (RSEQ _ RZERO) = False" +| "good (RSEQ r1 r2) = (good r1 \ good r2)" +| "good (RSTAR r) = True" +| "good (RNTIMES r n) = True" + lemma k0a: shows "rflts [RALTS rs] = rs" apply(simp) @@ -215,16 +430,15 @@ apply (metis flts1 good.simps(5) good.simps(6) k0a neq_Nil_conv) apply (metis flts1 good.simps(5) good.simps(6) k0a neq_Nil_conv) apply fastforce - apply(simp) - done - + apply(simp) + by simp lemma flts3: assumes "\r \ set rs. good r \ r = RZERO" shows "\r \ set (rflts rs). good r" using assms - apply(induct rs rule: rflts.induct) + apply(induct rs arbitrary: rule: rflts.induct) apply(simp_all) by (metis UnE flts2 k0a) @@ -249,7 +463,9 @@ apply(case_tac r2) apply(simp_all) apply(case_tac r2) - apply(simp_all) + apply(simp_all) +apply(case_tac r2) + apply(simp_all) done lemma rsize0: @@ -259,14 +475,31 @@ done +fun nonnested :: "rrexp \ bool" + where + "nonnested (RALTS []) = True" +| "nonnested (RALTS ((RALTS rs1) # rs2)) = False" +| "nonnested (RALTS (r # rs2)) = nonnested (RALTS rs2)" +| "nonnested r = True" + + + +lemma k00: + shows "rflts (rs1 @ rs2) = rflts rs1 @ rflts rs2" + apply(induct rs1 arbitrary: rs2) + apply(auto) + by (metis append.assoc k0) - - - - +lemma k0b: + assumes "nonalt r" "r \ RZERO" + shows "rflts [r] = [r]" + using assms + apply(case_tac r) + apply(simp_all) + done lemma nn1qq: assumes "nonnested (RALTS rs)" @@ -283,7 +516,7 @@ apply(induct rs ) apply(auto) apply (metis list.set_intros(1) nn1qq nonalt.elims(3)) - apply (metis nonalt.elims(2) nonnested.simps(3) nonnested.simps(4) nonnested.simps(5) nonnested.simps(6) nonnested.simps(7)) + apply (metis nonalt.elims(2) nonnested.simps(3) nonnested.simps(4) nonnested.simps(5) nonnested.simps(6) nonnested.simps(7) nonnested.simps(8)) using bbbbs1 apply fastforce by (metis bbbbs1 list.set_intros(2) nn1qq) @@ -326,8 +559,8 @@ apply(case_tac "\bs. rsimp r1 = RONE") apply(auto)[1] using idiot apply fastforce - using idiot2 nonnested.simps(11) apply presburger - by (metis (mono_tags, lifting) Diff_empty image_iff list.set_map nn1bb nn1c rdistinct_set_equality1) + apply (simp add: idiot2) + by (metis (mono_tags, lifting) image_iff list.set_map nn1bb nn1c rdistinct_set_equality) lemma nonalt_flts_rd: shows "\xa \ set (rdistinct (rflts (map rsimp rs)) {})\ @@ -335,6 +568,18 @@ by (metis Diff_empty ex_map_conv nn1b nn1c rdistinct_set_equality1) +lemma rsimpalts_implies1: + shows " rsimp_ALTs (a # rdistinct rs {a}) = RZERO \ a = RZERO" + using rsimp_ALTs.elims by auto + + +lemma rsimpalts_implies2: + shows "rsimp_ALTs (a # rdistinct rs rset) = RZERO \ rdistinct rs rset = []" + by (metis append_butlast_last_id rrexp.distinct(7) rsimpalts_conscons) + +lemma rsimpalts_implies21: + shows "rsimp_ALTs (a # rdistinct rs {a}) = RZERO \ rdistinct rs {a} = []" + using rsimpalts_implies2 by blast lemma bsimp_ASEQ2: @@ -359,15 +604,14 @@ (*says anything coming out of simp+flts+db will be good*) lemma good2_obv_simplified: shows " \\y. rsize y < Suc (rsizes rs) \ good (rsimp y) \ rsimp y = RZERO; - xa \ set (rdistinct (rflts (map rsimp rs)) {})\ \ good xa" + xa \ set (rdistinct (rflts (map rsimp rs)) {}); good (rsimp xa) \ rsimp xa = RZERO\ \ good xa" apply(subgoal_tac " \xa' \ set (map rsimp rs). good xa' \ xa' = RZERO") prefer 2 apply (simp add: elem_smaller_than_set) by (metis Diff_empty flts3 rdistinct_set_equality1) - - - +thm Diff_empty flts3 rdistinct_set_equality1 + lemma good1: shows "good (rsimp a) \ rsimp a = RZERO" apply(induct a taking: rsize rule: measure_induct) @@ -396,28 +640,65 @@ apply blast apply fastforce using less_add_Suc2 apply blast - using less_iff_Suc_add by blast + using less_iff_Suc_add apply blast + using good.simps(45) rsimp.simps(7) by presburger + + + +fun + RL :: "rrexp \ string set" +where + "RL (RZERO) = {}" +| "RL (RONE) = {[]}" +| "RL (RCHAR c) = {[c]}" +| "RL (RSEQ r1 r2) = (RL r1) ;; (RL r2)" +| "RL (RALTS rs) = (\ (set (map RL rs)))" +| "RL (RSTAR r) = (RL r)\" +| "RL (RNTIMES r n) = (RL r) ^^ n" + +lemma pow_rempty_iff: + shows "[] \ (RL r) ^^ n \ (if n = 0 then True else [] \ (RL r))" + by (induct n) (auto simp add: Sequ_def) lemma RL_rnullable: shows "rnullable r = ([] \ RL r)" apply(induct r) - apply(auto simp add: Sequ_def) + apply(auto simp add: Sequ_def pow_rempty_iff) done +lemma concI_if_Nil1: "[] \ A \ xs : B \ xs \ A ;; B" +by (metis append_Nil concI) + + +lemma empty_pow_add: + fixes A::"string set" + assumes "[] \ A" "s \ A ^^ n" + shows "s \ A ^^ (n + m)" + using assms + apply(induct m arbitrary: n) + apply(auto simp add: Sequ_def) + done + +(* +lemma der_pow: + shows "Der c (A ^^ n) = (if n = 0 then {} else (Der c A) ;; (A ^^ (n - 1)))" + apply(induct n arbitrary: A) + apply(auto) + by (smt (verit, best) Suc_pred concE concI concI_if_Nil2 conc_pow_comm lang_pow.simps(2)) +*) + lemma RL_rder: shows "RL (rder c r) = Der c (RL r)" apply(induct r) - apply(auto simp add: Sequ_def Der_def) + apply(auto simp add: Sequ_def Der_def)[5] apply (metis append_Cons) using RL_rnullable apply blast apply (metis append_eq_Cons_conv) apply (metis append_Cons) - apply (metis RL_rnullable append_eq_Cons_conv) - apply (metis Star.step append_Cons) - using Star_decomp by auto - - - + apply (metis RL_rnullable append_eq_Cons_conv) + apply simp + apply(simp) + done lemma RL_rsimp_RSEQ: shows "RL (rsimp_SEQ r1 r2) = (RL r1 ;; RL r2)" @@ -425,8 +706,6 @@ apply(simp_all) done - - lemma RL_rsimp_RALTS: shows "RL (rsimp_ALTs rs) = (\ (set (map RL rs)))" apply(induct rs rule: rsimp_ALTs.induct) @@ -452,19 +731,16 @@ using RL_rsimp_RALTS RL_rsimp_rdistinct RL_rsimp_rflts apply auto[1] by (smt (verit, del_insts) RL_rsimp_RALTS RL_rsimp_rdistinct RL_rsimp_rflts UN_E image_iff list.set_map) - - -lemma der_simp_nullability: - shows "rnullable r = rnullable (rsimp r)" - using RL_rnullable RL_rsimp by auto - lemma qqq1: shows "RZERO \ set (rflts (map rsimp rs))" by (metis ex_map_conv flts3 good.simps(1) good1) - +fun nonazero :: "rrexp \ bool" + where + "nonazero RZERO = False" +| "nonazero r = True" lemma flts_single1: @@ -570,21 +846,15 @@ by (metis good1 goodalts_nonalt rrexp.simps(12)) -corollary head_one_more_simp: +lemma head_one_more_simp: shows "map rsimp (r # rs) = map rsimp (( rsimp r) # rs)" by (simp add: rsimp_idem) - - -lemma basic_regex_property1: - shows "rnullable r \ rsimp r \ RZERO" - apply(induct r rule: rsimp.induct) - apply(auto) - apply (metis idiot idiot2 rrexp.distinct(5)) - by (metis der_simp_nullability rnullable.simps(1) rnullable.simps(4) rsimp.simps(2)) - - +lemma der_simp_nullability: + shows "rnullable r = rnullable (rsimp r)" + using RL_rnullable RL_rsimp by auto + lemma no_alt_short_list_after_simp: shows "RALTS rs = rsimp r \ rsimp_ALTs rs = RALTS rs" @@ -605,22 +875,325 @@ apply(case_tac "rsimp aa") apply simp+ apply (metis no_alt_short_list_after_simp no_further_dB_after_simp) - by simp + apply(simp) + apply(simp) + done + +lemma identity_wwo0: + shows "rsimp (rsimp_ALTs (RZERO # rs)) = rsimp (rsimp_ALTs rs)" + apply (metis idem_after_simp1 list.exhaust list.simps(8) list.simps(9) rflts.simps(2) rsimp.simps(2) rsimp.simps(3) rsimp_ALTs.simps(1) rsimp_ALTs.simps(2) rsimp_ALTs.simps(3)) + done + +lemma distinct_removes_last: + shows "\a \ set as\ + \ rdistinct as rset = rdistinct (as @ [a]) rset" +and "rdistinct (ab # as @ [ab]) rset1 = rdistinct (ab # as) rset1" + apply(induct as arbitrary: rset ab rset1 a) + apply simp + apply simp + apply(case_tac "aa \ rset") + apply(case_tac "a = aa") + apply (metis append_Cons) + apply simp + apply(case_tac "a \ set as") + apply (metis append_Cons rdistinct.simps(2) set_ConsD) + apply(case_tac "a = aa") + prefer 2 + apply simp + apply (metis append_Cons) + apply(case_tac "ab \ rset1") + prefer 2 + apply(subgoal_tac "rdistinct (ab # (a # as) @ [ab]) rset1 = + ab # (rdistinct ((a # as) @ [ab]) (insert ab rset1))") + prefer 2 + apply force + apply(simp only:) + apply(subgoal_tac "rdistinct (ab # a # as) rset1 = ab # (rdistinct (a # as) (insert ab rset1))") + apply(simp only:) + apply(subgoal_tac "rdistinct ((a # as) @ [ab]) (insert ab rset1) = rdistinct (a # as) (insert ab rset1)") + apply blast + apply(case_tac "a \ insert ab rset1") + apply simp + apply (metis insertI1) + apply simp + apply (meson insertI1) + apply simp + apply(subgoal_tac "rdistinct ((a # as) @ [ab]) rset1 = rdistinct (a # as) rset1") + apply simp + by (metis append_Cons insert_iff insert_is_Un rdistinct.simps(2)) + + +lemma distinct_removes_middle: + shows "\a \ set as\ + \ rdistinct (as @ as2) rset = rdistinct (as @ [a] @ as2) rset" +and "rdistinct (ab # as @ [ab] @ as3) rset1 = rdistinct (ab # as @ as3) rset1" + apply(induct as arbitrary: rset rset1 ab as2 as3 a) + apply simp + apply simp + apply(case_tac "a \ rset") + apply simp + apply metis + apply simp + apply (metis insertI1) + apply(case_tac "a = ab") + apply simp + apply(case_tac "ab \ rset") + apply simp + apply presburger + apply (meson insertI1) + apply(case_tac "a \ rset") + apply (metis (no_types, opaque_lifting) Un_insert_left append_Cons insert_iff rdistinct.simps(2) sup_bot_left) + apply(case_tac "ab \ rset") + apply simp + apply (meson insert_iff) + apply simp + by (metis insertI1) + + +lemma distinct_removes_middle3: + shows "\a \ set as\ + \ rdistinct (as @ a #as2) rset = rdistinct (as @ as2) rset" + using distinct_removes_middle(1) by fastforce + + +lemma distinct_removes_list: + shows "\ \r \ set rs. r \ set as\ \ rdistinct (as @ rs) {} = rdistinct as {}" + apply(induct rs) + apply simp+ + apply(subgoal_tac "rdistinct (as @ a # rs) {} = rdistinct (as @ rs) {}") + prefer 2 + apply (metis append_Cons append_Nil distinct_removes_middle(1)) + by presburger + + +lemma spawn_simp_rsimpalts: + shows "rsimp (rsimp_ALTs rs) = rsimp (rsimp_ALTs (map rsimp rs))" + apply(cases rs) + apply simp + apply(case_tac list) + apply simp + apply(subst rsimp_idem[symmetric]) + apply simp + apply(subgoal_tac "rsimp_ALTs rs = RALTS rs") + apply(simp only:) + apply(subgoal_tac "rsimp_ALTs (map rsimp rs) = RALTS (map rsimp rs)") + apply(simp only:) + prefer 2 + apply simp + prefer 2 + using rsimp_ALTs.simps(3) apply presburger + apply auto + apply(subst rsimp_idem)+ + by (metis comp_apply rsimp_idem) + + +lemma simp_singlealt_flatten: + shows "rsimp (RALTS [RALTS rsa]) = rsimp (RALTS (rsa @ []))" + apply(induct rsa) + apply simp + apply simp + by (metis idem_after_simp1 list.simps(9) rsimp.simps(2)) + + +lemma good1_rsimpalts: + shows "rsimp r = RALTS rs \ rsimp_ALTs rs = RALTS rs" + by (metis no_alt_short_list_after_simp) + + + + +lemma good1_flatten: + shows "\ rsimp r = (RALTS rs1)\ + \ rflts (rsimp_ALTs rs1 # map rsimp rsb) = rflts (rs1 @ map rsimp rsb)" + apply(subst good1_rsimpalts) + apply simp+ + apply(subgoal_tac "rflts (rs1 @ map rsimp rsb) = rs1 @ rflts (map rsimp rsb)") + apply simp + using flts_append rsimp_inner_idem4 by presburger + + +lemma flatten_rsimpalts: + shows "rflts (rsimp_ALTs (rdistinct (rflts (map rsimp rsa)) {}) # map rsimp rsb) = + rflts ( (rdistinct (rflts (map rsimp rsa)) {}) @ map rsimp rsb)" + apply(case_tac "map rsimp rsa") + apply simp + apply(case_tac "list") + apply simp + apply(case_tac a) + apply simp+ + apply(rename_tac rs1) + apply (metis good1_flatten map_eq_Cons_D no_further_dB_after_simp) + + apply simp + + apply(subgoal_tac "\r \ set( rflts (map rsimp rsa)). good r") + apply(case_tac "rdistinct (rflts (map rsimp rsa)) {}") + apply simp + apply auto[1] + apply simp + apply(simp) + apply(case_tac "lista") + apply simp_all + + apply (metis append_Cons append_Nil good1_flatten rflts.simps(2) rsimp.simps(2) rsimp_ALTs.elims) + by (metis (no_types, opaque_lifting) append_Cons append_Nil good1_flatten rflts.simps(2) rsimp.simps(2) rsimp_ALTs.elims) + +lemma last_elem_out: + shows "\x \ set xs; x \ rset \ \ rdistinct (xs @ [x]) rset = rdistinct xs rset @ [x]" + apply(induct xs arbitrary: rset) + apply simp+ + done +lemma rdistinct_concat_general: + shows "rdistinct (rs1 @ rs2) {} = (rdistinct rs1 {}) @ (rdistinct rs2 (set rs1))" + apply(induct rs1 arbitrary: rs2 rule: rev_induct) + apply simp + apply(drule_tac x = "x # rs2" in meta_spec) + apply simp + apply(case_tac "x \ set xs") + apply simp + + apply (simp add: distinct_removes_middle3 insert_absorb) + apply simp + by (simp add: last_elem_out) -(*equalities with rsimp *) -lemma identity_wwo0: - shows "rsimp (rsimp_ALTs (RZERO # rs)) = rsimp (rsimp_ALTs rs)" - by (metis idem_after_simp1 list.exhaust list.simps(8) list.simps(9) rflts.simps(2) rsimp.simps(2) rsimp.simps(3) rsimp_ALTs.simps(1) rsimp_ALTs.simps(2) rsimp_ALTs.simps(3)) + + + +lemma distinct_once_enough: + shows "rdistinct (rs @ rsa) {} = rdistinct (rdistinct rs {} @ rsa) {}" + apply(subgoal_tac "distinct (rdistinct rs {})") + apply(subgoal_tac +" rdistinct (rdistinct rs {} @ rsa) {} = rdistinct rs {} @ (rdistinct rsa (set rs))") + apply(simp only:) + using rdistinct_concat_general apply blast + apply (simp add: distinct_rdistinct_append rdistinct_set_equality1) + by (simp add: rdistinct_does_the_job) + + +lemma simp_flatten: + shows "rsimp (RALTS ((RALTS rsa) # rsb)) = rsimp (RALTS (rsa @ rsb))" + apply simp + apply(subst flatten_rsimpalts) + apply(simp add: flts_append) + by (metis Diff_empty distinct_once_enough flts_append nonalt0_fltseq nonalt_flts_rd qqq1 rdistinct_set_equality1) + +lemma basic_rsimp_SEQ_property1: + shows "rsimp_SEQ RONE r = r" + by (simp add: idiot) + + + +lemma basic_rsimp_SEQ_property3: + shows "rsimp_SEQ r RZERO = RZERO" + using rsimp_SEQ.elims by blast + + + +fun vsuf :: "char list \ rrexp \ char list list" where +"vsuf [] _ = []" +|"vsuf (c#cs) r1 = (if (rnullable r1) then (vsuf cs (rder c r1)) @ [c # cs] + else (vsuf cs (rder c r1)) + ) " +fun star_update :: "char \ rrexp \ char list list \ char list list" where +"star_update c r [] = []" +|"star_update c r (s # Ss) = (if (rnullable (rders r s)) + then (s@[c]) # [c] # (star_update c r Ss) + else (s@[c]) # (star_update c r Ss) )" + + +fun star_updates :: "char list \ rrexp \ char list list \ char list list" + where +"star_updates [] r Ss = Ss" +| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)" + +lemma stupdates_append: shows +"star_updates (s @ [c]) r Ss = star_update c r (star_updates s r Ss)" + apply(induct s arbitrary: Ss) + apply simp + apply simp + done + +lemma flts_removes0: + shows " rflts (rs @ [RZERO]) = + rflts rs" + apply(induct rs) + apply simp + by (metis append_Cons rflts.simps(2) rflts.simps(3) rflts_def_idiot) + + +lemma rflts_spills_last: + shows "rflts (rs1 @ [RALTS rs]) = rflts rs1 @ rs" + apply (induct rs1 rule: rflts.induct) + apply(auto) + done + +lemma flts_keeps1: + shows "rflts (rs @ [RONE]) = rflts rs @ [RONE]" + apply (induct rs rule: rflts.induct) + apply(auto) + done + +lemma flts_keeps_others: + shows "\a \ RZERO; \rs1. a = RALTS rs1\ \rflts (rs @ [a]) = rflts rs @ [a]" + apply(induct rs rule: rflts.induct) + apply(auto) + by (meson k0b nonalt.elims(3)) + +lemma spilled_alts_contained: + shows "\a = RALTS rs ; a \ set rs1\ \ \r \ set rs. r \ set (rflts rs1)" + apply(induct rs1) + apply simp + apply(case_tac "a = aa") + apply simp + apply(subgoal_tac " a \ set rs1") + prefer 2 + apply (meson set_ConsD) + apply(case_tac aa) + using rflts.simps(2) apply presburger + apply fastforce + apply fastforce + apply fastforce + apply fastforce + apply fastforce + by simp + + +lemma distinct_removes_duplicate_flts: + shows " a \ set rsa + \ rdistinct (rflts (map rsimp rsa @ [rsimp a])) {} = + rdistinct (rflts (map rsimp rsa)) {}" + apply(subgoal_tac "rsimp a \ set (map rsimp rsa)") + prefer 2 + apply simp + apply(induct "rsimp a") + apply simp + using flts_removes0 apply presburger + apply(subgoal_tac " rdistinct (rflts (map rsimp rsa @ [rsimp a])) {} = + rdistinct (rflts (map rsimp rsa @ [RONE])) {}") + apply (simp only:) + apply(subst flts_keeps1) + apply (metis distinct_removes_last(1) flts_append in_set_conv_decomp rflts.simps(4)) + apply presburger + apply(subgoal_tac " rdistinct (rflts (map rsimp rsa @ [rsimp a])) {} = + rdistinct ((rflts (map rsimp rsa)) @ [RCHAR x]) {}") + apply (simp only:) + prefer 2 + apply (metis flts_append rflts.simps(1) rflts.simps(5)) + apply (metis distinct_removes_last(1) rflts_def_idiot2 rrexp.distinct(25) rrexp.distinct(3)) + apply (metis distinct_removes_last(1) flts_append rflts.simps(1) rflts.simps(6) rflts_def_idiot2 rrexp.distinct(31) rrexp.distinct(5)) + apply (metis distinct_removes_list rflts_spills_last spilled_alts_contained) + apply (metis distinct_removes_last(1) flts_append good.simps(1) good.simps(44) rflts.simps(1) rflts.simps(7) rflts_def_idiot2 rrexp.distinct(37)) + by (metis distinct_removes_last(1) flts_append rflts.simps(1) rflts.simps(8) rflts_def_idiot2 rrexp.distinct(11) rrexp.distinct(39)) (*some basic facts about rsimp*) diff -r 56057198e4f5 -r 70c10dc41606 thys3/Blexer.thy --- a/thys3/Blexer.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/Blexer.thy Fri May 26 08:10:17 2023 +0100 @@ -109,7 +109,7 @@ by (smt append_Nil2 decode'_code old.prod.case) -section {* Annotated Regular Expressions *} +section \Annotated Regular Expressions\ datatype arexp = AZERO diff -r 56057198e4f5 -r 70c10dc41606 thys3/BlexerSimp.thy --- a/thys3/BlexerSimp.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/BlexerSimp.thy Fri May 26 08:10:17 2023 +0100 @@ -2,6 +2,35 @@ imports Blexer begin +fun distinctWith :: "'a list \ ('a \ 'a \ bool) \ 'a set \ 'a list" + where + "distinctWith [] eq acc = []" +| "distinctWith (x # xs) eq acc = + (if (\ y \ acc. eq x y) then distinctWith xs eq acc + else x # (distinctWith xs eq ({x} \ acc)))" + + +fun eq1 ("_ ~1 _" [80, 80] 80) where + "AZERO ~1 AZERO = True" +| "(AONE bs1) ~1 (AONE bs2) = True" +| "(ACHAR bs1 c) ~1 (ACHAR bs2 d) = (if c = d then True else False)" +| "(ASEQ bs1 ra1 ra2) ~1 (ASEQ bs2 rb1 rb2) = (ra1 ~1 rb1 \ ra2 ~1 rb2)" +| "(AALTs bs1 []) ~1 (AALTs bs2 []) = True" +| "(AALTs bs1 (r1 # rs1)) ~1 (AALTs bs2 (r2 # rs2)) = (r1 ~1 r2 \ (AALTs bs1 rs1) ~1 (AALTs bs2 rs2))" +| "(ASTAR bs1 r1) ~1 (ASTAR bs2 r2) = r1 ~1 r2" +| "(ANTIMES bs1 r1 n1) ~1 (ANTIMES bs2 r2 n2) = (r1 ~1 r2 \ n1 = n2)" +| "_ ~1 _ = False" + + + +lemma eq1_L: + assumes "x ~1 y" + shows "L (erase x) = L (erase y)" + using assms + apply(induct rule: eq1.induct) + apply(auto elim: eq1.elims) + apply presburger + done fun flts :: "arexp list \ arexp list" where @@ -42,11 +71,42 @@ | "bsimp_AALTs bs1 [r] = fuse bs1 r" | "bsimp_AALTs bs1 rs = AALTs bs1 rs" + + + +fun bsimp :: "arexp \ arexp" + where + "bsimp (ASEQ bs1 r1 r2) = bsimp_ASEQ bs1 (bsimp r1) (bsimp r2)" +| "bsimp (AALTs bs1 rs) = bsimp_AALTs bs1 (distinctWith (flts (map bsimp rs)) eq1 {}) " +| "bsimp r = r" + + +fun + bders_simp :: "arexp \ string \ arexp" +where + "bders_simp r [] = r" +| "bders_simp r (c # s) = bders_simp (bsimp (bder c r)) s" + +definition blexer_simp where + "blexer_simp r s \ if bnullable (bders_simp (intern r) s) then + decode (bmkeps (bders_simp (intern r) s)) r else None" + + + +lemma bders_simp_append: + shows "bders_simp r (s1 @ s2) = bders_simp (bders_simp r s1) s2" + apply(induct s1 arbitrary: r s2) + apply(simp_all) + done + lemma bmkeps_fuse: assumes "bnullable r" shows "bmkeps (fuse bs r) = bs @ bmkeps r" using assms - by (induct r rule: bnullable.induct) (auto) + apply(induct r rule: bnullable.induct) + apply(auto) + apply (metis append.assoc bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) + done lemma bmkepss_fuse: assumes "bnullables rs" @@ -65,84 +125,6 @@ -fun ABIncludedByC :: "'a \ 'b \ 'c \ ('a \ 'b \ 'c) \ ('c \ 'c \ bool) \ bool" where - "ABIncludedByC a b c f subseteqPred = subseteqPred (f a b) c" - -fun furtherSEQ :: "rexp \ rexp \ rexp list" and - turnIntoTerms :: "rexp \ rexp list " - where - "furtherSEQ ONE r2 = turnIntoTerms r2 " -| "furtherSEQ r11 r2 = [SEQ r11 r2]" -| "turnIntoTerms (SEQ ONE r2) = turnIntoTerms r2" -| "turnIntoTerms (SEQ r1 r2) = concat (map (\r11. furtherSEQ r11 r2) (turnIntoTerms r1))" -| "turnIntoTerms r = [r]" - -abbreviation "tint \ turnIntoTerms" - -fun seqFold :: "rexp \ rexp list \ rexp" where - "seqFold acc [] = acc" -| "seqFold ONE (r # rs1) = seqFold r rs1" -| "seqFold acc (r # rs1) = seqFold (SEQ acc r) rs1" - - -fun attachCtx :: "arexp \ rexp list \ rexp set" where - "attachCtx r ctx = set (turnIntoTerms (seqFold (erase r) ctx))" - - -fun rs1_subseteq_rs2 :: "rexp set \ rexp set \ bool" where - "rs1_subseteq_rs2 rs1 rs2 = (rs1 \ rs2)" - - -fun notZero :: "arexp \ bool" where - "notZero AZERO = True" -| "notZero _ = False" - - -fun tset :: "arexp list \ rexp set" where - "tset rs = set (concat (map turnIntoTerms (map erase rs)))" - - -fun prune6 :: "rexp set \ arexp \ rexp list \ arexp" where - "prune6 acc a ctx = (if (ABIncludedByC a ctx acc attachCtx rs1_subseteq_rs2) then AZERO else - (case a of (ASEQ bs r1 r2) \ bsimp_ASEQ bs (prune6 acc r1 (erase r2 # ctx)) r2 - | AALTs bs rs0 \ bsimp_AALTs bs (filter notZero (map (\r.(prune6 acc r ctx)) rs0)) - | r \ r - ) - ) - " - -abbreviation - "p6 acc r \ prune6 (tset acc) r Nil" - - -fun dB6 :: "arexp list \ rexp set \ arexp list" where - "dB6 [] acc = []" -| "dB6 (a # as) acc = (if (erase a \ acc) then (dB6 as acc) - else (let pruned = prune6 acc a [] in - (case pruned of - AZERO \ dB6 as acc - |xPrime \ xPrime # (dB6 as ( (set (turnIntoTerms (erase pruned))) \ acc) ) ) )) " - - -fun bsimpStrong6 :: "arexp \ arexp" - where - "bsimpStrong6 (ASEQ bs1 r1 r2) = bsimp_ASEQ bs1 (bsimpStrong6 r1) (bsimpStrong6 r2)" -| "bsimpStrong6 (AALTs bs1 rs) = bsimp_AALTs bs1 (dB6 (flts (map bsimpStrong6 rs)) {}) " -| "bsimpStrong6 r = r" - - -fun - bdersStrong6 :: "arexp \ string \ arexp" -where - "bdersStrong6 r [] = r" -| "bdersStrong6 r (c # s) = bdersStrong6 (bsimpStrong6 (bder c r)) s" - -definition blexerStrong where - "blexerStrong r s \ if bnullable (bdersStrong6 (intern r) s) then - decode (bmkeps (bdersStrong6 (intern r) s)) r else None" - - - inductive rrewrite:: "arexp \ arexp \ bool" ("_ \ _" [99, 99] 99) and @@ -162,9 +144,7 @@ | ss4: "(AZERO # rs) s\ rs" | ss5: "(AALTs bs1 rs1 # rsb) s\ ((map (fuse bs1) rs1) @ rsb)" | ss6: "L (erase a2) \ L (erase a1) \ (rsa@[a1]@rsb@[a2]@rsc) s\ (rsa@[a1]@rsb@rsc)" -| ss7: " (as @ [a] @ as1) s\ (as @ [p6 as a] @ as1)" -thm tset.simps inductive rrewrites:: "arexp \ arexp \ bool" ("_ \* _" [100, 100] 100) @@ -203,6 +183,8 @@ apply(auto) done + + lemma contextrewrites0: "rs1 s\* rs2 \ AALTs bs rs1 \* AALTs bs rs2" apply(induct rs1 rs2 rule: srewrites.inducts) @@ -227,24 +209,17 @@ apply(auto) using srewrite1 by blast -lemma srewrites_prepend: - shows "rs1 s\* rs2 \ (r # rs1) s\* (r # rs2)" - by (metis append_Cons append_Nil srewrites1) - lemma srewrite2: shows "r1 \ r2 \ True" and "rs1 s\ rs2 \ (rs1 @ rs) s\* (rs2 @ rs)" - apply(induct arbitrary: rs rule: rrewrite_srewrite.inducts) - apply simp+ - using srewrites_prepend apply force - apply (simp add: rs_in_rstar ss3) - using ss4 apply force - using rs_in_rstar ss5 apply auto[1] - using rs_in_rstar ss6 apply auto[1] - using rs_in_rstar ss7 by force - - - + apply(induct rule: rrewrite_srewrite.inducts) + apply(auto) + apply (metis append_Cons append_Nil srewrites1) + apply(meson srewrites.simps ss3) + apply (meson srewrites.simps ss4) + apply (meson srewrites.simps ss5) + by (metis append_Cons append_Nil srewrites.simps ss6) + lemma srewrites3: shows "rs1 s\* rs2 \ (rs1 @ rs) s\* (rs2 @ rs)" @@ -252,6 +227,15 @@ apply(auto) by (meson srewrite2(2) srewrites_trans) +(* +lemma srewrites4: + assumes "rs3 s\* rs4" "rs1 s\* rs2" + shows "(rs1 @ rs3) s\* (rs2 @ rs4)" + using assms + apply(induct rs3 rs4 arbitrary: rs1 rs2 rule: srewrites.induct) + apply (simp add: srewrites3) + using srewrite1 by blast +*) lemma srewrites6: assumes "r1 \* r2" @@ -267,178 +251,27 @@ using assms by (smt (verit, best) append_Cons append_Nil srewrites1 srewrites3 srewrites6 srewrites_trans) -(*harmless sorry*) -lemma existing_preimage : - shows "f a \ f ` set rs1 \ \rs1a rs1b x. rs1 = rs1a @ [x] @ rs1b \ f x = f a " - apply(induct rs1) - apply simp - apply(case_tac "f a = f aa") - - sorry - - -lemma deletes_dB: - shows " \erase a \ erase ` set rs1\ \ (rs1 @ a # rs2) s\* (rs1 @ rs2)" - apply(subgoal_tac "\rs1a rs1b x. rs1 = rs1a @ [x] @ rs1b \ erase x = erase a") +lemma ss6_stronger_aux: + shows "(rs1 @ rs2) s\* (rs1 @ distinctWith rs2 eq1 (set rs1))" + apply(induct rs2 arbitrary: rs1) + apply(auto) prefer 2 - apply (meson existing_preimage) + apply(drule_tac x="rs1 @ [a]" in meta_spec) + apply(simp) + apply(drule_tac x="rs1" in meta_spec) + apply(subgoal_tac "(rs1 @ a # rs2) s\* (rs1 @ rs2)") + using srewrites_trans apply blast + apply(subgoal_tac "\rs1a rs1b. rs1 = rs1a @ [x] @ rs1b") + prefer 2 + apply (simp add: split_list) apply(erule exE)+ - apply simp - apply(subgoal_tac "(rs1a @ [x] @ rs1b @ [a] @ rs2) s\ (rs1a @ [x] @ rs1b @ rs2)") - apply (simp add: rs_in_rstar) - apply(subgoal_tac "L (erase a) \ L (erase x)") - using ss6 apply presburger - by simp - - - -lemma ss6_realistic: - shows "(rs1 @ rs2) s\* (rs1 @ dB6 rs2 (tset rs1))" - apply(induct rs2 arbitrary: rs1) - apply simp - apply(rename_tac cc' cc) - apply(subgoal_tac "(cc @ a # cc') s\* (cc @ a # dB6 cc' (tset (cc @ [a])))") - prefer 2 - apply (metis append.assoc append.left_neutral append_Cons) - apply(subgoal_tac "(cc @ a # dB6 cc' (tset (cc @ [a]))) s\* (cc @ (p6 cc a) # dB6 cc' (tset (cc @ [a])))") - sorry - - - + apply(simp) + using eq1_L rs_in_rstar ss6 by force lemma ss6_stronger: - shows "rs1 s\* dB6 rs1 {}" - using ss6 - by (metis append_Nil concat.simps(1) list.set(1) list.simps(8) ss6_realistic tset.simps) - - -lemma tint_fuse: - shows "tint (erase a) = tint (erase (fuse bs a))" - by (simp add: erase_fuse) - -lemma tint_fuse2: - shows " set (tint (erase a)) = - set (tint (erase (fuse bs a)))" - using tint_fuse by auto - -lemma fused_bits_at_head: - shows "fuse bs a = ASEQ bs1 a1 a2 \ \bs2. bs1 = bs @ bs2" - -(* by (smt (verit) arexp.distinct(13) arexp.distinct(19) arexp.distinct(25) arexp.distinct(27) arexp.distinct(5) arexp.inject(3) fuse.elims) -harmless sorry -*) - - - sorry - -thm seqFold.simps - -lemma seqFold_concats: - shows "r \ ONE \ seqFold r [r1] = SEQ r r1" - apply(case_tac r) - apply simp+ -done - - -lemma tint_seqFold_eq: shows -"set (tint (seqFold (erase x42) [erase x43])) = - set (tint (SEQ (erase x42) (erase x43)))" - apply(case_tac "erase x42 = ONE") - apply simp - using seqFold_concats - apply simp - done - -fun top :: "arexp \ bit list" where - "top AZERO = []" -| "top (AONE bs) = bs" -| "top (ASEQ bs r1 r2) = bs" -| "top (ACHAR v va) = v" -| "top (AALTs v va) = v" -| "top (ASTAR v va) = v " - - - - -lemma p6fa_aux: - shows " fuse bs - (bsimp_AALTs bs\<^sub>0 as) = - - (bsimp_AALTs (bs @ bs\<^sub>0) as)" - by (metis bsimp_AALTs.simps(1) bsimp_AALTs.simps(2) bsimp_AALTs.simps(3) fuse.simps(1) fuse.simps(4) fuse_append neq_Nil_conv) - + shows "rs1 s\* distinctWith rs1 eq1 {}" + by (metis append_Nil list.set(1) ss6_stronger_aux) -lemma p6pfuse_alts: - shows -" \bs\<^sub>0 as\<^sub>0. - \\a bs. set (tint (erase a)) = set (tint (erase (fuse bs a))); a = AALTs bs\<^sub>0 as\<^sub>0\ - \ \ set (tint (erase a)) \ (\x\set as. set (tint (erase x))) \ - fuse bs - (case a of AZERO \ AZERO | AONE x \ AONE x | ACHAR x xa \ ACHAR x xa - | ASEQ bs r1 r2 \ bsimp_ASEQ bs (prune6 (\x\set as. set (tint (erase x))) r1 [erase r2]) r2 - | AALTs bs rs0 \ bsimp_AALTs bs (filter notZero (map (\r. prune6 (\x\set as. set (tint (erase x))) r []) rs0)) | ASTAR x xa \ ASTAR x xa) - = - (case fuse bs a of AZERO \ AZERO | AONE x \ AONE x | ACHAR x xa \ ACHAR x xa - | ASEQ bs r1 r2 \ bsimp_ASEQ bs (prune6 (\x\set as. set (tint (erase x))) r1 [erase r2]) r2 - | AALTs bs rs0 \ bsimp_AALTs bs (filter notZero (map (\r. prune6 (\x\set as. set (tint (erase x))) r []) rs0)) | ASTAR x xa \ ASTAR x xa)" - apply simp - using p6fa_aux by presburger - - - - -lemma prune6_preserves_fuse: - shows "fuse bs (p6 as a) = p6 as (fuse bs a)" - using tint_fuse2 - apply simp - apply(case_tac a) - apply simp - apply simp - apply simp - - using fused_bits_at_head - - apply simp - using tint_seqFold_eq - apply simp - apply (smt (z3) bsimp_ASEQ.simps(1) bsimp_ASEQ0 bsimp_ASEQ1 bsimp_ASEQ2 fuse.simps(1) fuse.simps(5) fuse_append) - using p6pfuse_alts apply presburger - by simp - - -(* -The top-level bitlist stays the same: -\<^sub>b\<^sub>sa ------pruning-----> \<^sub>b\<^sub>s\<^sub>' b \ \bs3. bs' = bs @ bs3 -*) -lemma top_bitcodes_preserved_p6: - shows "top r = bs \ p6 as r = AZERO \ (\bs3. top (p6 as r) = bs @ bs3)" - - - apply(induct r arbitrary: as) - -(*this sorry requires more care *) - - sorry - - - -corollary prune6_preserves_fuse_srewrite: - shows "(as @ map (fuse bs) [a] @ as2) s\* (as @ map (fuse bs) [p6 as a] @ as2)" - apply(subgoal_tac "map (fuse bs) [a] = [fuse bs a]") - apply(subgoal_tac "(as @ [fuse bs a] @ as2) s\* (as @ [ (p6 as (fuse bs a))] @ as2)") - using prune6_preserves_fuse apply auto[1] - using rs_in_rstar ss7 apply presburger - by simp - -lemma prune6_invariant_fuse: - shows "p6 as a = p6 (map (fuse bs) as) a" - apply simp - using erase_fuse by presburger - - -lemma p6pfs_cor: - shows "(map (fuse bs) as @ map (fuse bs) [a] @ map (fuse bs) as1) s\* (map (fuse bs) as @ map (fuse bs) [p6 as a] @ map (fuse bs) as1)" - by (metis prune6_invariant_fuse prune6_preserves_fuse_srewrite) lemma rewrite_preserves_fuse: shows "r2 \ r3 \ fuse bs r2 \ fuse bs r3" @@ -468,13 +301,7 @@ then show ?case apply(simp only: map_append) by (smt (verit, best) erase_fuse list.simps(8) list.simps(9) rrewrite_srewrite.ss6 srewrites.simps) -next - case (ss7 as a as1) - then show ?case - apply(simp only: map_append) - using p6pfs_cor by presburger qed (auto intro: rrewrite_srewrite.intros) - lemma rewrites_fuse: @@ -507,12 +334,17 @@ shows "ASEQ bs1 r1 r2 \* AZERO" using assms bs1 star_seq by blast - +(* +lemma continuous_rewrite2: + assumes "r1 \* AONE bs" + shows "ASEQ bs1 r1 r2 \* (fuse (bs1 @ bs) r2)" + using assms by (meson bs3 rrewrites.simps star_seq) +*) lemma bsimp_aalts_simpcases: - shows "AONE bs \* bsimpStrong6 (AONE bs)" - and "AZERO \* bsimpStrong6 AZERO" - and "ACHAR bs c \* bsimpStrong6 (ACHAR bs c)" + shows "AONE bs \* bsimp (AONE bs)" + and "AZERO \* bsimp AZERO" + and "ACHAR bs c \* bsimp (ACHAR bs c)" by (simp_all) lemma bsimp_AALTs_rewrites: @@ -535,20 +367,17 @@ using rs1 srewrites7 apply presburger using srewrites7 apply force apply (simp add: srewrites7) + apply(simp add: srewrites7) by (simp add: srewrites7) + lemma bnullable0: shows "r1 \ r2 \ bnullable r1 = bnullable r2" and "rs1 s\ rs2 \ bnullables rs1 = bnullables rs2" - apply(induct rule: rrewrite_srewrite.inducts) - apply simp - apply simp - apply (simp add: bnullable_fuse) - using bnullable.simps(5) apply presburger - apply simp - apply simp - sorry - + apply(induct rule: rrewrite_srewrite.inducts) + apply(auto simp add: bnullable_fuse) + apply (meson UnCI bnullable_fuse imageI) + using bnullable_correctness nullable_correctness by blast lemma rewritesnullable: @@ -557,7 +386,7 @@ using assms apply(induction r1 r2 rule: rrewrites.induct) apply simp - using bnullable0(1) by presburger + using bnullable0(1) by auto lemma rewrite_bmkeps_aux: shows "r1 \ r2 \ (bnullable r1 \ bnullable r2 \ bmkeps r1 = bmkeps r2)" @@ -575,19 +404,22 @@ next case (ss5 bs1 rs1 rsb) then show ?case - by (simp add: bmkepss1 bmkepss2 bmkepss_fuse bnullable_fuse) + apply (simp add: bmkepss1 bmkepss2 bmkepss_fuse bnullable_fuse) + apply(case_tac rs1) + apply(auto simp add: bnullable_fuse) + apply (metis bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) + apply (metis bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) + apply (metis bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) + by (metis bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) next case (ss6 a1 a2 rsa rsb rsc) then show ?case by (smt (verit, best) Nil_is_append_conv bmkepss1 bmkepss2 bnullable_correctness in_set_conv_decomp list.distinct(1) list.set_intros(1) nullable_correctness set_ConsD subsetD) next - prefer 10 - case (ss7 as a as1) + case (bs10 rs1 rs2 bs) then show ?case - -(*this sorry requires more effort*) - sorry -qed(auto) + by (metis bmkeps_retrieve bmkeps_retrieve_AALTs bnullable.simps(4)) +qed (auto) lemma rewrites_bmkeps: assumes "r1 \* r2" "bnullable r1" @@ -609,6 +441,50 @@ qed +lemma rewrites_to_bsimp: + shows "r \* bsimp r" +proof (induction r rule: bsimp.induct) + case (1 bs1 r1 r2) + have IH1: "r1 \* bsimp r1" by fact + have IH2: "r2 \* bsimp r2" by fact + { assume as: "bsimp r1 = AZERO \ bsimp r2 = AZERO" + with IH1 IH2 have "r1 \* AZERO \ r2 \* AZERO" by auto + then have "ASEQ bs1 r1 r2 \* AZERO" + by (metis bs2 continuous_rewrite rrewrites.simps star_seq2) + then have "ASEQ bs1 r1 r2 \* bsimp (ASEQ bs1 r1 r2)" using as by auto + } + moreover + { assume "\bs. bsimp r1 = AONE bs" + then obtain bs where as: "bsimp r1 = AONE bs" by blast + with IH1 have "r1 \* AONE bs" by simp + then have "ASEQ bs1 r1 r2 \* fuse (bs1 @ bs) r2" using bs3 star_seq by blast + with IH2 have "ASEQ bs1 r1 r2 \* fuse (bs1 @ bs) (bsimp r2)" + using rewrites_fuse by (meson rrewrites_trans) + then have "ASEQ bs1 r1 r2 \* bsimp (ASEQ bs1 (AONE bs) r2)" by simp + then have "ASEQ bs1 r1 r2 \* bsimp (ASEQ bs1 r1 r2)" by (simp add: as) + } + moreover + { assume as1: "bsimp r1 \ AZERO" "bsimp r2 \ AZERO" and as2: "(\bs. bsimp r1 = AONE bs)" + then have "bsimp_ASEQ bs1 (bsimp r1) (bsimp r2) = ASEQ bs1 (bsimp r1) (bsimp r2)" + by (simp add: bsimp_ASEQ1) + then have "ASEQ bs1 r1 r2 \* bsimp_ASEQ bs1 (bsimp r1) (bsimp r2)" using as1 as2 IH1 IH2 + by (metis rrewrites_trans star_seq star_seq2) + then have "ASEQ bs1 r1 r2 \* bsimp (ASEQ bs1 r1 r2)" by simp + } + ultimately show "ASEQ bs1 r1 r2 \* bsimp (ASEQ bs1 r1 r2)" by blast +next + case (2 bs1 rs) + have IH: "\x. x \ set rs \ x \* bsimp x" by fact + then have "rs s\* (map bsimp rs)" by (simp add: trivialbsimp_srewrites) + also have "... s\* flts (map bsimp rs)" by (simp add: fltsfrewrites) + also have "... s\* distinctWith (flts (map bsimp rs)) eq1 {}" by (simp add: ss6_stronger) + finally have "AALTs bs1 rs \* AALTs bs1 (distinctWith (flts (map bsimp rs)) eq1 {})" + using contextrewrites0 by auto + also have "... \* bsimp_AALTs bs1 (distinctWith (flts (map bsimp rs)) eq1 {})" + by (simp add: bsimp_AALTs_rewrites) + finally show "AALTs bs1 rs \* bsimp (AALTs bs1 rs)" by simp +qed (simp_all) + lemma to_zero_in_alt: shows "AALT bs (ASEQ [] AZERO r) r2 \ AALT bs AZERO r2" @@ -626,12 +502,6 @@ shows "(map f [a]) = [f a]" by (simp) -lemma "set (tint (erase a)) \ (\x\set as. set (tint (erase x))) \ - set (tint (erase (bder c a))) \ (\x\set (map (bder c) as). set (tint (erase x)))" - - sorry - - lemma rewrite_preserves_bder: shows "r1 \ r2 \ (bder c r1) \* (bder c r2)" and "rs1 s\ rs2 \ map (bder c) rs1 s\* map (bder c) rs2" @@ -707,11 +577,6 @@ apply(rule_tac rrewrite_srewrite.ss6) using Der_def der_correctness apply auto[1] by blast -next - case (ss7 as a as1) - then show ?case - apply simp - sorry qed lemma rewrites_preserves_bder: @@ -720,105 +585,49 @@ using assms apply(induction r1 r2 rule: rrewrites.induct) apply(simp_all add: rewrite_preserves_bder rrewrites_trans) - done - +done -lemma bders_simp_appendStrong : - shows "bdersStrong6 r (s1 @ s2) = bdersStrong6 (bdersStrong6 r s1) s2" - apply(induct s1 arbitrary: s2 rule: rev_induct) - apply simp - apply auto - done +lemma central: + shows "bders r s \* bders_simp r s" +proof(induct s arbitrary: r rule: rev_induct) + case Nil + then show "bders r [] \* bders_simp r []" by simp +next + case (snoc x xs) + have IH: "\r. bders r xs \* bders_simp r xs" by fact + have "bders r (xs @ [x]) = bders (bders r xs) [x]" by (simp add: bders_append) + also have "... \* bders (bders_simp r xs) [x]" using IH + by (simp add: rewrites_preserves_bder) + also have "... \* bders_simp (bders_simp r xs) [x]" using IH + by (simp add: rewrites_to_bsimp) + finally show "bders r (xs @ [x]) \* bders_simp r (xs @ [x])" + by (simp add: bders_simp_append) +qed + +lemma main_aux: + assumes "bnullable (bders r s)" + shows "bmkeps (bders r s) = bmkeps (bders_simp r s)" +proof - + have "bders r s \* bders_simp r s" by (rule central) + then + show "bmkeps (bders r s) = bmkeps (bders_simp r s)" using assms + by (rule rewrites_bmkeps) +qed -lemma rewrites_to_bsimpStrong: - shows "r \* bsimpStrong6 r" -proof (induction r rule: bsimpStrong6.induct) - case (1 bs1 r1 r2) - have IH1: "r1 \* bsimpStrong6 r1" by fact - have IH2: "r2 \* bsimpStrong6 r2" by fact - { assume as: "bsimpStrong6 r1 = AZERO \ bsimpStrong6 r2 = AZERO" - with IH1 IH2 have "r1 \* AZERO \ r2 \* AZERO" by auto - then have "ASEQ bs1 r1 r2 \* AZERO" - by (metis bs2 continuous_rewrite rrewrites.simps star_seq2) - then have "ASEQ bs1 r1 r2 \* bsimpStrong6 (ASEQ bs1 r1 r2)" using as by auto - } - moreover - { assume "\bs. bsimpStrong6 r1 = AONE bs" - then obtain bs where as: "bsimpStrong6 r1 = AONE bs" by blast - with IH1 have "r1 \* AONE bs" by simp - then have "ASEQ bs1 r1 r2 \* fuse (bs1 @ bs) r2" using bs3 star_seq by blast - with IH2 have "ASEQ bs1 r1 r2 \* fuse (bs1 @ bs) (bsimpStrong6 r2)" - using rewrites_fuse by (meson rrewrites_trans) - then have "ASEQ bs1 r1 r2 \* bsimpStrong6 (ASEQ bs1 (AONE bs) r2)" by simp - then have "ASEQ bs1 r1 r2 \* bsimpStrong6 (ASEQ bs1 r1 r2)" by (simp add: as) - } - moreover - { assume as1: "bsimpStrong6 r1 \ AZERO" "bsimpStrong6 r2 \ AZERO" and as2: "(\bs. bsimpStrong6 r1 = AONE bs)" - then have "bsimp_ASEQ bs1 (bsimpStrong6 r1) (bsimpStrong6 r2) = ASEQ bs1 (bsimpStrong6 r1) (bsimpStrong6 r2)" - by (simp add: bsimp_ASEQ1) - then have "ASEQ bs1 r1 r2 \* bsimp_ASEQ bs1 (bsimpStrong6 r1) (bsimpStrong6 r2)" using as1 as2 IH1 IH2 - by (metis rrewrites_trans star_seq star_seq2) - then have "ASEQ bs1 r1 r2 \* bsimpStrong6 (ASEQ bs1 r1 r2)" by simp - } - ultimately show "ASEQ bs1 r1 r2 \* bsimpStrong6 (ASEQ bs1 r1 r2)" - by blast -next - case (2 bs1 rs) - have IH: "\x. x \ set rs \ x \* bsimpStrong6 x" by fact - then have "rs s\* (map bsimpStrong6 rs)" by (simp add: trivialbsimp_srewrites) - also have "... s\* flts (map bsimpStrong6 rs)" by (simp add: fltsfrewrites) - also have "... s\* dB6 (flts (map bsimpStrong6 rs)) {}" by (simp add: ss6_stronger) - finally have "AALTs bs1 rs \* AALTs bs1 (dB6 (flts (map bsimpStrong6 rs)) {})" - using contextrewrites0 by auto - also have "... \* bsimp_AALTs bs1 (dB6 (flts (map bsimpStrong6 rs)) {})" - by (simp add: bsimp_AALTs_rewrites) - finally show "AALTs bs1 rs \* bsimpStrong6 (AALTs bs1 rs)" - using bsimpStrong6.simps(2) by presburger -qed (simp_all) +theorem main_blexer_simp: + shows "blexer r s = blexer_simp r s" + unfolding blexer_def blexer_simp_def + by (metis central main_aux rewritesnullable) + +theorem blexersimp_correctness: + shows "lexer r s = blexer_simp r s" + using blexer_correctness main_blexer_simp by simp - - -lemma centralStrong: - shows "bders r s \* bdersStrong6 r s" -proof(induct s arbitrary: r rule: rev_induct) - case Nil - then show "bders r [] \* bdersStrong6 r []" by simp -next - case (snoc x xs) - have IH: "\r. bders r xs \* bdersStrong6 r xs" by fact - have "bders r (xs @ [x]) = bders (bders r xs) [x]" by (simp add: bders_append) - also have "... \* bders (bdersStrong6 r xs) [x]" using IH - by (simp add: rewrites_preserves_bder) - also have "... \* bdersStrong6 (bdersStrong6 r xs) [x]" using IH - by (simp add: rewrites_to_bsimpStrong) - finally show "bders r (xs @ [x]) \* bdersStrong6 r (xs @ [x])" - by (simp add: bders_simp_appendStrong) -qed - -lemma mainStrong: - assumes "bnullable (bders r s)" - shows "bmkeps (bders r s) = bmkeps (bdersStrong6 r s)" -proof - - have "bders r s \* bdersStrong6 r s" - using centralStrong by force - then - show "bmkeps (bders r s) = bmkeps (bdersStrong6 r s)" - using assms rewrites_bmkeps by blast -qed - - - - -theorem blexerStrong_correct : - shows "blexerStrong r s = blexer r s" - unfolding blexerStrong_def blexer_def - by (metis centralStrong mainStrong rewritesnullable) - - +unused_thms end diff -r 56057198e4f5 -r 70c10dc41606 thys3/ClosedForms.thy --- a/thys3/ClosedForms.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/ClosedForms.thy Fri May 26 08:10:17 2023 +0100 @@ -1,9 +1,20 @@ theory ClosedForms - imports "HarderProps" + imports "BasicIdentities" begin +lemma flts_middle0: + shows "rflts (rsa @ RZERO # rsb) = rflts (rsa @ rsb)" + apply(induct rsa) + apply simp + by (metis append_Cons rflts.simps(2) rflts.simps(3) rflts_def_idiot) + +lemma simp_flatten_aux0: + shows "rsimp (RALTS rs) = rsimp (RALTS (map rsimp rs))" + by (metis append_Nil head_one_more_simp identity_wwo0 list.simps(8) rdistinct.simps(1) rflts.simps(1) rsimp.simps(2) rsimp_ALTs.simps(1) rsimp_ALTs.simps(3) simp_flatten spawn_simp_rsimpalts) + + inductive hrewrite:: "rrexp \ rrexp \ bool" ("_ h\ _" [99, 99] 99) where @@ -56,8 +67,44 @@ by (meson hreal_trans hrewrites_seq_context hrewrites_seq_context2) +lemma simp_removes_duplicate1: + shows " a \ set rsa \ rsimp (RALTS (rsa @ [a])) = rsimp (RALTS (rsa))" +and " rsimp (RALTS (a1 # rsa @ [a1])) = rsimp (RALTS (a1 # rsa))" + apply(induct rsa arbitrary: a1) + apply simp + apply simp + prefer 2 + apply(case_tac "a = aa") + apply simp + apply simp + apply (metis Cons_eq_appendI Cons_eq_map_conv distinct_removes_duplicate_flts list.set_intros(2)) + apply (metis append_Cons append_Nil distinct_removes_duplicate_flts list.set_intros(1) list.simps(8) list.simps(9)) + by (metis (mono_tags, lifting) append_Cons distinct_removes_duplicate_flts list.set_intros(1) list.simps(8) list.simps(9) map_append rsimp.simps(2)) + +lemma simp_removes_duplicate2: + shows "a \ set rsa \ rsimp (RALTS (rsa @ [a] @ rsb)) = rsimp (RALTS (rsa @ rsb))" + apply(induct rsb arbitrary: rsa) + apply simp + using distinct_removes_duplicate_flts apply auto[1] + by (metis append.assoc head_one_more_simp rsimp.simps(2) simp_flatten simp_removes_duplicate1(1)) +lemma simp_removes_duplicate3: + shows "a \ set rsa \ rsimp (RALTS (rsa @ a # rsb)) = rsimp (RALTS (rsa @ rsb))" + using simp_removes_duplicate2 by auto +(* +lemma distinct_removes_middle4: + shows "a \ set rsa \ rdistinct (rsa @ [a] @ rsb) rset = rdistinct (rsa @ rsb) rset" + using distinct_removes_middle(1) by fastforce +*) + +(* +lemma distinct_removes_middle_list: + shows "\a \ set x. a \ set rsa \ rdistinct (rsa @ x @ rsb) rset = rdistinct (rsa @ rsb) rset" + apply(induct x) + apply simp + by (simp add: distinct_removes_middle3) +*) inductive frewrite:: "rrexp list \ rrexp list \ bool" ("_ \f _" [10, 10] 10) where @@ -212,6 +259,11 @@ using grewrites_append apply blast by (meson greal_trans grewrites.simps grewrites_concat) +fun alt_set:: "rrexp \ rrexp set" + where + "alt_set (RALTS rs) = set rs \ \ (alt_set ` (set rs))" +| "alt_set r = {r}" + lemma grewrite_cases_middle: shows "rs1 \g rs2 \ @@ -226,6 +278,216 @@ by blast +lemma good_singleton: + shows "good a \ nonalt a \ rflts [a] = [a]" + using good.simps(1) k0b by blast + + + + + + + +lemma all_that_same_elem: + shows "\ a \ rset; rdistinct rs {a} = []\ + \ rdistinct (rs @ rsb) rset = rdistinct rsb rset" + apply(induct rs) + apply simp + apply(subgoal_tac "aa = a") + apply simp + by (metis empty_iff insert_iff list.discI rdistinct.simps(2)) + +lemma distinct_early_app1: + shows "rset1 \ rset \ rdistinct rs rset = rdistinct (rdistinct rs rset1) rset" + apply(induct rs arbitrary: rset rset1) + apply simp + apply simp + apply(case_tac "a \ rset1") + apply simp + apply(case_tac "a \ rset") + apply simp+ + + apply blast + apply(case_tac "a \ rset1") + apply simp+ + apply(case_tac "a \ rset") + apply simp + apply (metis insert_subsetI) + apply simp + by (meson insert_mono) + + +lemma distinct_early_app: + shows " rdistinct (rs @ rsb) rset = rdistinct (rdistinct rs {} @ rsb) rset" + apply(induct rsb) + apply simp + using distinct_early_app1 apply blast + by (metis distinct_early_app1 distinct_once_enough empty_subsetI) + + +lemma distinct_eq_interesting1: + shows "a \ rset \ rdistinct (rs @ rsb) rset = rdistinct (rdistinct (a # rs) {} @ rsb) rset" + apply(subgoal_tac "rdistinct (rdistinct (a # rs) {} @ rsb) rset = rdistinct (rdistinct rs {} @ rsb) rset") + apply(simp only:) + using distinct_early_app apply blast + by (metis append_Cons distinct_early_app rdistinct.simps(2)) + + + +lemma good_flatten_aux_aux1: + shows "\ size rs \2; +\r \ set rs. good r \ r \ RZERO \ nonalt r; \r \ set rsb. good r \ r \ RZERO \ nonalt r \ + \ rdistinct (rs @ rsb) rset = + rdistinct (rflts [rsimp_ALTs (rdistinct rs {})] @ rsb) rset" + apply(induct rs arbitrary: rset) + apply simp + apply(case_tac "a \ rset") + apply simp + apply(case_tac "rdistinct rs {a}") + apply simp + apply(subst good_singleton) + apply force + apply simp + apply (meson all_that_same_elem) + apply(subgoal_tac "rflts [rsimp_ALTs (a # rdistinct rs {a})] = a # rdistinct rs {a} ") + prefer 2 + using k0a rsimp_ALTs.simps(3) apply presburger + apply(simp only:) + apply(subgoal_tac "rdistinct (rs @ rsb) rset = rdistinct ((rdistinct (a # rs) {}) @ rsb) rset ") + apply (metis insert_absorb insert_is_Un insert_not_empty rdistinct.simps(2)) + apply (meson distinct_eq_interesting1) + apply simp + apply(case_tac "rdistinct rs {a}") + prefer 2 + apply(subgoal_tac "rsimp_ALTs (a # rdistinct rs {a}) = RALTS (a # rdistinct rs {a})") + apply(simp only:) + apply(subgoal_tac "a # rdistinct (rs @ rsb) (insert a rset) = + rdistinct (rflts [RALTS (a # rdistinct rs {a})] @ rsb) rset") + apply simp + apply (metis append_Cons distinct_early_app empty_iff insert_is_Un k0a rdistinct.simps(2)) + using rsimp_ALTs.simps(3) apply presburger + by (metis Un_insert_left append_Cons distinct_early_app empty_iff good_singleton rdistinct.simps(2) rsimp_ALTs.simps(2) sup_bot_left) + + + + + +lemma good_flatten_aux_aux: + shows "\\a aa lista list. rs = a # list \ list = aa # lista; +\r \ set rs. good r \ r \ RZERO \ nonalt r; \r \ set rsb. good r \ r \ RZERO \ nonalt r \ + \ rdistinct (rs @ rsb) rset = + rdistinct (rflts [rsimp_ALTs (rdistinct rs {})] @ rsb) rset" + apply(erule exE)+ + apply(subgoal_tac "size rs \ 2") + apply (metis good_flatten_aux_aux1) + by (simp add: Suc_leI length_Cons less_add_Suc1) + + + +lemma good_flatten_aux: + shows " \\r\set rs. good r \ r = RZERO; \r\set rsa . good r \ r = RZERO; + \r\set rsb. good r \ r = RZERO; + rsimp (RALTS (rsa @ rs @ rsb)) = rsimp_ALTs (rdistinct (rflts (rsa @ rs @ rsb)) {}); + rsimp (RALTS (rsa @ [RALTS rs] @ rsb)) = + rsimp_ALTs (rdistinct (rflts (rsa @ [rsimp (RALTS rs)] @ rsb)) {}); + map rsimp rsa = rsa; map rsimp rsb = rsb; map rsimp rs = rs; + rdistinct (rflts rsa @ rflts rs @ rflts rsb) {} = + rdistinct (rflts rsa) {} @ rdistinct (rflts rs @ rflts rsb) (set (rflts rsa)); + rdistinct (rflts rsa @ rflts [rsimp (RALTS rs)] @ rflts rsb) {} = + rdistinct (rflts rsa) {} @ rdistinct (rflts [rsimp (RALTS rs)] @ rflts rsb) (set (rflts rsa))\ + \ rdistinct (rflts rs @ rflts rsb) rset = + rdistinct (rflts [rsimp (RALTS rs)] @ rflts rsb) rset" + apply simp + apply(case_tac "rflts rs ") + apply simp + apply(case_tac "list") + apply simp + apply(case_tac "a \ rset") + apply simp + apply (metis append.left_neutral append_Cons equals0D k0b list.set_intros(1) nonalt_flts_rd qqq1 rdistinct.simps(2)) + apply simp + apply (metis Un_insert_left append_Cons append_Nil ex_in_conv flts_single1 insertI1 list.simps(15) nonalt_flts_rd nonazero.elims(3) qqq1 rdistinct.simps(2) sup_bot_left) + apply(subgoal_tac "\r \ set (rflts rs). good r \ r \ RZERO \ nonalt r") + prefer 2 + apply (metis Diff_empty flts3 nonalt_flts_rd qqq1 rdistinct_set_equality1) + apply(subgoal_tac "\r \ set (rflts rsb). good r \ r \ RZERO \ nonalt r") + prefer 2 + apply (metis Diff_empty flts3 good.simps(1) nonalt_flts_rd rdistinct_set_equality1) + by (smt (verit, ccfv_threshold) good_flatten_aux_aux) + + + + +lemma good_flatten_middle: + shows "\\r \ set rs. good r \ r = RZERO; \r \ set rsa. good r \ r = RZERO; \r \ set rsb. good r \ r = RZERO\ \ +rsimp (RALTS (rsa @ rs @ rsb)) = rsimp (RALTS (rsa @ [RALTS rs] @ rsb))" + apply(subgoal_tac "rsimp (RALTS (rsa @ rs @ rsb)) = rsimp_ALTs (rdistinct (rflts (map rsimp rsa @ +map rsimp rs @ map rsimp rsb)) {})") + prefer 2 + apply simp + apply(simp only:) + apply(subgoal_tac "rsimp (RALTS (rsa @ [RALTS rs] @ rsb)) = rsimp_ALTs (rdistinct (rflts (map rsimp rsa @ +[rsimp (RALTS rs)] @ map rsimp rsb)) {})") + prefer 2 + apply simp + apply(simp only:) + apply(subgoal_tac "map rsimp rsa = rsa") + prefer 2 + apply (metis map_idI rsimp.simps(3) test) + apply(simp only:) + apply(subgoal_tac "map rsimp rsb = rsb") + prefer 2 + apply (metis map_idI rsimp.simps(3) test) + apply(simp only:) + apply(subst k00)+ + apply(subgoal_tac "map rsimp rs = rs") + apply(simp only:) + prefer 2 + apply (metis map_idI rsimp.simps(3) test) + apply(subgoal_tac "rdistinct (rflts rsa @ rflts rs @ rflts rsb) {} = +rdistinct (rflts rsa) {} @ rdistinct (rflts rs @ rflts rsb) (set (rflts rsa))") + apply(simp only:) + prefer 2 + using rdistinct_concat_general apply blast + apply(subgoal_tac "rdistinct (rflts rsa @ rflts [rsimp (RALTS rs)] @ rflts rsb) {} = +rdistinct (rflts rsa) {} @ rdistinct (rflts [rsimp (RALTS rs)] @ rflts rsb) (set (rflts rsa))") + apply(simp only:) + prefer 2 + using rdistinct_concat_general apply blast + apply(subgoal_tac "rdistinct (rflts rs @ rflts rsb) (set (rflts rsa)) = + rdistinct (rflts [rsimp (RALTS rs)] @ rflts rsb) (set (rflts rsa))") + apply presburger + using good_flatten_aux by blast + + +lemma simp_flatten3: + shows "rsimp (RALTS (rsa @ [RALTS rs] @ rsb)) = rsimp (RALTS (rsa @ rs @ rsb))" + apply(subgoal_tac "rsimp (RALTS (rsa @ [RALTS rs] @ rsb)) = + rsimp (RALTS (map rsimp rsa @ [rsimp (RALTS rs)] @ map rsimp rsb)) ") + prefer 2 + apply (metis append.left_neutral append_Cons list.simps(9) map_append simp_flatten_aux0) + apply (simp only:) + apply(subgoal_tac "rsimp (RALTS (rsa @ rs @ rsb)) = +rsimp (RALTS (map rsimp rsa @ map rsimp rs @ map rsimp rsb))") + prefer 2 + apply (metis map_append simp_flatten_aux0) + apply(simp only:) + apply(subgoal_tac "rsimp (RALTS (map rsimp rsa @ map rsimp rs @ map rsimp rsb)) = + rsimp (RALTS (map rsimp rsa @ [RALTS (map rsimp rs)] @ map rsimp rsb))") + + apply (metis (no_types, lifting) head_one_more_simp map_append simp_flatten_aux0) + apply(subgoal_tac "\r \ set (map rsimp rsa). good r \ r = RZERO") + apply(subgoal_tac "\r \ set (map rsimp rs). good r \ r = RZERO") + apply(subgoal_tac "\r \ set (map rsimp rsb). good r \ r = RZERO") + + using good_flatten_middle apply presburger + + apply (simp add: good1) + apply (simp add: good1) + apply (simp add: good1) + + done + @@ -277,8 +539,10 @@ apply (simp add: frewrites_alt) apply (simp add: frewrites_cons) apply (simp add: frewrites_append) - by (simp add: frewrites_cons) - + apply (simp add: frewrites_cons) + apply (auto simp add: frewrites_cons) + using frewrite.intros(1) many_steps_later by blast + lemma gstar0: shows "rsa @ (rdistinct rs (set rsa)) \g* rsa @ (rdistinct rs (insert RZERO (set rsa)))" @@ -380,7 +644,8 @@ apply(induct r) apply simp+ apply (metis list.set_intros(1)) - by blast + apply blast + by simp @@ -604,8 +869,39 @@ by (metis append.right_neutral grewrite.intros(2) grewrite_simpalts rsimp_ALTs.simps(2) simp_der_flts) +lemma basic_regex_property1: + shows "rnullable r \ rsimp r \ RZERO" + apply(induct r rule: rsimp.induct) + apply(auto) + apply (metis idiot idiot2 rrexp.distinct(5)) + by (metis der_simp_nullability rnullable.simps(1) rnullable.simps(4) rsimp.simps(2)) +lemma inside_simp_seq_nullable: + shows +"\r1 r2. + \rsimp (rder x (rsimp r1)) = rsimp (rder x r1); rsimp (rder x (rsimp r2)) = rsimp (rder x r2); + rnullable r1\ + \ rsimp (rder x (rsimp_SEQ (rsimp r1) (rsimp r2))) = + rsimp_ALTs (rdistinct (rflts [rsimp_SEQ (rsimp (rder x r1)) (rsimp r2), rsimp (rder x r2)]) {})" + apply(case_tac "rsimp r1 = RONE") + apply(simp) + apply(subst basic_rsimp_SEQ_property1) + apply (simp add: idem_after_simp1) + apply(case_tac "rsimp r1 = RZERO") + + using basic_regex_property1 apply blast + apply(case_tac "rsimp r2 = RZERO") + + apply (simp add: basic_rsimp_SEQ_property3) + apply(subst idiot2) + apply simp+ + apply(subgoal_tac "rnullable (rsimp r1)") + apply simp + using rsimp_idem apply presburger + using der_simp_nullability by presburger + + lemma grewrite_ralts: shows "rs \g rs' \ RALTS rs h\* RALTS rs'" @@ -754,7 +1050,7 @@ apply (meson flts_gstar greal_trans grewrites_ralts_rsimpalts gstar_rdistinct) apply (simp add: grewrites_ralts hrewrites_list) - by simp + by simp_all lemma interleave_aux1: shows " RALT (RSEQ RZERO r1) r h\* r" @@ -815,30 +1111,6 @@ -lemma inside_simp_seq_nullable: - shows -"\r1 r2. - \rsimp (rder x (rsimp r1)) = rsimp (rder x r1); rsimp (rder x (rsimp r2)) = rsimp (rder x r2); - rnullable r1\ - \ rsimp (rder x (rsimp_SEQ (rsimp r1) (rsimp r2))) = - rsimp_ALTs (rdistinct (rflts [rsimp_SEQ (rsimp (rder x r1)) (rsimp r2), rsimp (rder x r2)]) {})" - apply(case_tac "rsimp r1 = RONE") - apply(simp) - apply(subst basic_rsimp_SEQ_property1) - apply (simp add: idem_after_simp1) - apply(case_tac "rsimp r1 = RZERO") - using basic_regex_property1 apply blast - apply(case_tac "rsimp r2 = RZERO") - apply (simp add: basic_rsimp_SEQ_property3) - apply(subst idiot2) - apply simp+ - apply(subgoal_tac "rnullable (rsimp r1)") - apply simp - using rsimp_idem apply presburger - using der_simp_nullability by presburger - - - lemma inside_simp_removal: shows " rsimp (rder x (rsimp r)) = rsimp (rder x r)" apply(induct r) @@ -852,7 +1124,7 @@ apply(subgoal_tac "rder x (RALTS xa) h\* rder x (rsimp (RALTS xa))") using hrewrites_simpeq apply presburger using interleave_star1 simp_hrewrites apply presburger - by simp + by simp_all @@ -887,16 +1159,14 @@ using rders_simp_same_simpders rsimp_idem by auto lemma repeated_altssimp: - shows "\r \ set rs. rsimp r = r \ - rsimp (rsimp_ALTs (rdistinct (rflts rs) {})) = + shows "\r \ set rs. rsimp r = r \ rsimp (rsimp_ALTs (rdistinct (rflts rs) {})) = rsimp_ALTs (rdistinct (rflts rs) {})" by (metis map_idI rsimp.simps(2) rsimp_idem) lemma alts_closed_form: - shows "rsimp (rders_simp (RALTS rs) s) = - rsimp (RALTS (map (\r. rders_simp r s) rs))" + shows "rsimp (rders_simp (RALTS rs) s) = rsimp (RALTS (map (\r. rders_simp r s) rs))" apply(induct s rule: rev_induct) apply simp apply simp @@ -981,11 +1251,15 @@ apply simp+ using created_by_seq.cases apply blast - - apply (meson created_by_seq.cases rrexp.distinct(19) rrexp.distinct(21)) - apply (metis created_by_seq.simps rder.simps(5)) - apply (smt (verit, ccfv_threshold) created_by_seq.simps list.set_intros(1) list.simps(8) list.simps(9) rder.simps(4) rrexp.distinct(25) rrexp.inject(3)) - using created_by_seq.intros(1) by force + apply(auto) + apply (meson created_by_seq.cases rrexp.distinct(23) rrexp.distinct(25)) + using created_by_seq.simps apply blast + apply (meson created_by_seq.simps) + using created_by_seq.intros(1) apply blast + apply (metis (no_types, lifting) created_by_seq.simps k0a list.set_intros(1) list.simps(8) list.simps(9) rrexp.distinct(31)) + apply (simp add: created_by_seq.intros(1)) + using created_by_seq.simps apply blast + by (simp add: created_by_seq.intros(1)) lemma createdbyseq_left_creatable: shows "created_by_seq (RALT r1 r2) \ created_by_seq r1" @@ -1030,16 +1304,6 @@ by fastforce -fun vsuf :: "char list \ rrexp \ char list list" where -"vsuf [] _ = []" -|"vsuf (c#cs) r1 = (if (rnullable r1) then (vsuf cs (rder c r1)) @ [c # cs] - else (vsuf cs (rder c r1)) - ) " - - - - - lemma vsuf_prop1: shows "vsuf (xs @ [x]) r = (if (rnullable (rders r xs)) then [x] # (map (\s. s @ [x]) (vsuf xs r) ) @@ -1071,75 +1335,44 @@ apply simp by (simp add: rders_append) -thm sfau_idem_der -lemma oneCharvsuf: - shows "breakHead [rder x (RSEQ r1 r2)] = RSEQ (rder x r1) r2 # map (rders r2) (vsuf [x] r1)" - by simp - - -lemma vsuf_compose2: - shows "(map (rders r2) (vsuf [x] (rders r1 xs))) @ map (rder x) (map (rders r2) (vsuf xs r1)) = - map (rders r2) (vsuf (xs @ [x]) r1)" -proof(induct xs arbitrary: r1) - case Nil - then show ?case - by simp -next - case (Cons a xs) - have "rnullable (rders r1 xs) \ map (rders r2) (vsuf [x] (rders r1 (a # xs))) @ map (rder x) (map (rders r2) (vsuf (a # xs) r1)) = - map (rders r2) (vsuf ((a # xs) @ [x]) r1)" - proof - assume nullableCond: "rnullable (rders r1 xs)" - have "rnullable r1 \ rder x (rders (rder a r2) xs) = rders (rder a r2) (xs @ [x])" - by (simp add: rders_append) - show " map (rders r2) (vsuf [x] (rders r1 (a # xs))) @ map (rder x) (map (rders r2) (vsuf (a # xs) r1)) = - map (rders r2) (vsuf ((a # xs) @ [x]) r1)" - using \rnullable r1 \ rder x (rders (rder a r2) xs) = rders (rder a r2) (xs @ [x])\ local.Cons by auto - qed - then have "\ rnullable (rders r1 xs) \ map (rders r2) (vsuf [x] (rders r1 (a # xs))) @ map (rder x) (map (rders r2) (vsuf (a # xs) r1)) = - map (rders r2) (vsuf ((a # xs) @ [x]) r1)" - apply simp - by (smt (verit, ccfv_threshold) append_Cons append_Nil list.map_comp list.simps(8) list.simps(9) local.Cons rders.simps(1) rders.simps(2) rders_append vsuf.simps(1) vsuf.simps(2)) - then show ?case - using \rnullable (rders r1 xs) \ map (rders r2) (vsuf [x] (rders r1 (a # xs))) @ map (rder x) (map (rders r2) (vsuf (a # xs) r1)) = map (rders r2) (vsuf ((a # xs) @ [x]) r1)\ by blast -qed lemma seq_sfau0: shows "sflat_aux (rders (RSEQ r1 r2) s) = (RSEQ (rders r1 s) r2) # (map (rders r2) (vsuf s r1)) " -proof(induct s rule: rev_induct) - case Nil - then show ?case - by simp -next - case (snoc x xs) - then have LHS1:"sflat_aux (rders (RSEQ r1 r2) (xs @ [x])) = sflat_aux (rder x (rders (RSEQ r1 r2) xs)) " - by (simp add: rders_append) - then have LHS1A: "... = breakHead (map (rder x) (sflat_aux (rders (RSEQ r1 r2) xs)))" - using recursively_derseq sfau_idem_der by auto - then have LHS1B: "... = breakHead (map (rder x) (RSEQ (rders r1 xs) r2 # map (rders r2) (vsuf xs r1)))" - using snoc by auto - then have LHS1C: "... = breakHead (rder x (RSEQ (rders r1 xs) r2) # map (rder x) (map (rders r2) (vsuf xs r1)))" - by simp - then have LHS1D: "... = breakHead [rder x (RSEQ (rders r1 xs) r2)] @ map (rder x) (map (rders r2) (vsuf xs r1))" - by simp - then have LHS1E: "... = RSEQ (rder x (rders r1 xs)) r2 # (map (rders r2) (vsuf [x] (rders r1 xs))) @ map (rder x) (map (rders r2) (vsuf xs r1))" - by force - then have LHS1F: "... = RSEQ (rder x (rders r1 xs)) r2 # (map (rders r2) (vsuf (xs @ [x]) r1))" - using vsuf_compose2 by blast - then have LHS1G: "... = RSEQ (rders r1 (xs @ [x])) r2 # (map (rders r2) (vsuf (xs @ [x]) r1))" - using rders.simps(1) rders.simps(2) rders_append by presburger - then show ?case - using LHS1 LHS1A LHS1C LHS1D LHS1E LHS1F snoc by presburger -qed + apply(induct s rule: rev_induct) + apply simp + apply(subst rders_append)+ + apply(subgoal_tac "created_by_seq (rders (RSEQ r1 r2) xs)") + prefer 2 + using recursively_derseq1 apply blast + apply simp + apply(subst sfau_idem_der) + + apply blast + apply(case_tac "rnullable (rders r1 xs)") + apply simp + apply(subst vsuf_prop1) + apply simp + apply (simp add: rders_append) + apply simp + using vsuf_compose1 by blast + + + +thm sflat.elims + + + + + lemma sflat_rsimpeq: shows "created_by_seq r1 \ sflat_aux r1 = rs \ rsimp r1 = rsimp (RALTS rs)" apply(induct r1 arbitrary: rs rule: created_by_seq.induct) @@ -1225,8 +1458,7 @@ lemma seq_closed_form_variant: assumes "s \ []" shows "rders_simp (RSEQ r1 r2) s = - rsimp (RALTS (RSEQ (rders_simp r1 s) r2 # - (map (rders_simp r2) (vsuf s r1))))" + rsimp (RALTS (RSEQ (rders_simp r1 s) r2 # (map (rders_simp r2) (vsuf s r1))))" using assms q seq_closed_form by force @@ -1243,20 +1475,9 @@ "created_by_star (RSEQ ra (RSTAR rb))" | "\created_by_star r1; created_by_star r2\ \ created_by_star (RALT r1 r2)" - - -fun star_update :: "char \ rrexp \ char list list \ char list list" where -"star_update c r [] = []" -|"star_update c r (s # Ss) = (if (rnullable (rders r s)) - then (s@[c]) # [c] # (star_update c r Ss) - else (s@[c]) # (star_update c r Ss) )" - - -fun star_updates :: "char list \ rrexp \ char list list \ char list list" - where -"star_updates [] r Ss = Ss" -| "star_updates (c # cs) r Ss = star_updates cs r (star_update c r Ss)" - +fun hElem :: "rrexp \ rrexp list" where + "hElem (RALT r1 r2) = (hElem r1 ) @ (hElem r2)" +| "hElem r = [r]" lemma cbs_ders_cbs: @@ -1277,38 +1498,20 @@ - lemma hfau_pushin: - shows "created_by_star r \ hflat_aux (rder c r) = concat (map hflat_aux (map (rder c) (hflat_aux r)))" -proof(induct r rule: created_by_star.induct) - case (1 ra rb) - then show ?case by simp -next - case (2 r1 r2) - then have "created_by_star (rder c r1)" - using cbs_ders_cbs by blast - then have "created_by_star (rder c r2)" - using "2.hyps"(3) cbs_ders_cbs by auto - then show ?case - by (simp add: "2.hyps"(2) "2.hyps"(4)) - qed - -(*AALTS [a\x . b.c, b\x .c, c \x]*) -(*AALTS [a\x . b.c, AALTS [b\x .c, c\x]]*) - -lemma stupdates_append: shows -"star_updates (s @ [c]) r Ss = star_update c r (star_updates s r Ss)" - apply(induct s arbitrary: Ss) + shows "created_by_star r \ hflat_aux (rder c r) = concat (map hElem (map (rder c) (hflat_aux r)))" + apply(induct r rule: created_by_star.induct) apply simp + apply(subgoal_tac "created_by_star (rder c r1)") + prefer 2 + apply(subgoal_tac "created_by_star (rder c r2)") + using cbs_ders_cbs apply blast + using cbs_ders_cbs apply auto[1] apply simp done - - - - lemma stupdate_induct1: - shows " concat (map (hflat_aux \ (rder x \ (\s1. RSEQ (rders r0 s1) (RSTAR r0)))) Ss) = + shows " concat (map (hElem \ (rder x \ (\s1. RSEQ (rders r0 s1) (RSTAR r0)))) Ss) = map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_update x r0 Ss)" apply(induct Ss) apply simp+ @@ -1317,11 +1520,8 @@ lemma stupdates_join_general: - shows "concat - (map hflat_aux (map (rder x) - (map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates xs r0 Ss))) - ) = - map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates (xs @ [x]) r0 Ss)" + shows "concat (map hElem (map (rder x) (map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates xs r0 Ss)))) = + map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates (xs @ [x]) r0 Ss)" apply(induct xs arbitrary: Ss) apply (simp) prefer 2 @@ -1341,14 +1541,16 @@ apply (simp add: star_ders_cbs) apply(subst hfau_pushin) apply simp - apply(subgoal_tac "concat (map hflat_aux (map (rder x) (hflat_aux (rders (RSEQ (rder c r0) (RSTAR r0)) xs)))) = - concat (map hflat_aux (map (rder x) ( map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates xs r0 [[c]])))) ") + apply(subgoal_tac "concat (map hElem (map (rder x) (hflat_aux (rders (RSEQ (rder c r0) (RSTAR r0)) xs)))) = + concat (map hElem (map (rder x) ( map (\s1. RSEQ (rders r0 s1) (RSTAR r0)) (star_updates xs r0 [[c]])))) ") apply(simp only:) prefer 2 apply presburger apply(subst stupdates_append[symmetric]) using stupdates_join_general by blast + + lemma starders_hfau_also1: shows "hflat_aux (rders (RSTAR r) (c # xs)) = map (\s1. RSEQ (rders r s1) (RSTAR r)) (star_updates xs r [[c]])" using star_hfau_induct by force @@ -1366,7 +1568,7 @@ apply simp apply (metis (no_types, lifting) append_Cons append_eq_append_conv2 gmany_steps_later greal_trans grewrite.intros(2) grewrites_append self_append_conv) apply simp - by simp + by simp_all @@ -1380,10 +1582,29 @@ lemma hfau_rsimpeq2: shows "created_by_star r \ rsimp r = rsimp ( (RALTS (hflat_aux r)))" - apply(induct rule: created_by_star.induct) + apply(induct r) + apply simp+ + + apply (metis rsimp_seq_equal1) + prefer 2 + apply simp + apply(case_tac x) + apply simp + apply(case_tac "list") apply simp - apply (metis rsimp.simps(6) rsimp_seq_equal1) - using cbs_hfau_rsimpeq1 hflat_aux.simps(1) by presburger + + apply (metis idem_after_simp1) + apply(case_tac "lista") + prefer 2 + apply (metis hflat_aux.simps(8) idem_after_simp1 list.simps(8) list.simps(9) rsimp.simps(2)) + apply(subgoal_tac "rsimp (RALT a aa) = rsimp (RALTS (hflat_aux (RALT a aa)))") + apply simp + apply(subgoal_tac "rsimp (RALT a aa) = rsimp (RALTS (hflat_aux a @ hflat_aux aa))") + using hflat_aux.simps(1) apply presburger + apply simp + using cbs_hfau_rsimpeq1 apply(fastforce) + by simp + lemma star_closed_form1: shows "rsimp (rders (RSTAR r0) (c#s)) = @@ -1427,8 +1648,11 @@ using hrewrites_simpeq srewritescf_alt1 apply fastforce using star_closed_form6_hrewrites by blast + + + lemma stupdate_nonempty: - shows "\s \ set Ss. s \ [] \ \s \ set (star_update c r Ss). s \ []" + shows "\s \ set Ss. s \ [] \ \s \ set (star_update c r Ss). s \ []" apply(induct Ss) apply simp apply(case_tac "rnullable (rders r a)") @@ -1454,12 +1678,518 @@ lemma star_closed_form: shows "rders_simp (RSTAR r0) (c#s) = rsimp ( RALTS ( (map (\s1. RSEQ (rders_simp r0 s1) (RSTAR r0) ) (star_updates s r0 [[c]]) ) ))" - apply(induct s) + apply(case_tac s) apply simp apply (metis idem_after_simp1 rsimp.simps(1) rsimp.simps(6) rsimp_idem) using star_closed_form4 star_closed_form5 star_closed_form6 star_closed_form8 by presburger -unused_thms + + +fun nupdate :: "char \ rrexp \ (string * nat) option list \ (string * nat) option list" where + "nupdate c r [] = []" +| "nupdate c r (Some (s, Suc n) # Ss) = (if (rnullable (rders r s)) + then Some (s@[c], Suc n) # Some ([c], n) # (nupdate c r Ss) + else Some ((s@[c]), Suc n) # (nupdate c r Ss) + )" +| "nupdate c r (Some (s, 0) # Ss) = (if (rnullable (rders r s)) + then Some (s@[c], 0) # None # (nupdate c r Ss) + else Some ((s@[c]), 0) # (nupdate c r Ss) + ) " +| "nupdate c r (None # Ss) = (None # nupdate c r Ss)" + + +fun nupdates :: "char list \ rrexp \ (string * nat) option list \ (string * nat) option list" + where + "nupdates [] r Ss = Ss" +| "nupdates (c # cs) r Ss = nupdates cs r (nupdate c r Ss)" + +fun ntset :: "rrexp \ nat \ string \ (string * nat) option list" where + "ntset r (Suc n) (c # cs) = nupdates cs r [Some ([c], n)]" +| "ntset r 0 _ = [None]" +| "ntset r _ [] = []" + +inductive created_by_ntimes :: "rrexp \ bool" where + "created_by_ntimes RZERO" +| "created_by_ntimes (RSEQ ra (RNTIMES rb n))" +| "\created_by_ntimes r1; created_by_ntimes r2\ \ created_by_ntimes (RALT r1 r2)" +| "\created_by_ntimes r \ \ created_by_ntimes (RALT r RZERO)" + +fun highest_power_aux :: "(string * nat) option list \ nat \ nat" where + "highest_power_aux [] n = n" +| "highest_power_aux (None # rs) n = highest_power_aux rs n" +| "highest_power_aux (Some (s, n) # rs) m = highest_power_aux rs (max n m)" + +fun hpower :: "(string * nat) option list \ nat" where + "hpower rs = highest_power_aux rs 0" + + +lemma nupdate_mono: + shows " (highest_power_aux (nupdate c r optlist) m) \ (highest_power_aux optlist m)" + apply(induct optlist arbitrary: m) + apply simp + apply(case_tac a) + apply simp + apply(case_tac aa) + apply(case_tac b) + apply simp+ + done + +lemma nupdate_mono1: + shows "hpower (nupdate c r optlist) \ hpower optlist" + by (simp add: nupdate_mono) + + + +lemma cbn_ders_cbn: + shows "created_by_ntimes r \ created_by_ntimes (rder c r)" + apply(induct r rule: created_by_ntimes.induct) + apply simp + + using created_by_ntimes.intros(1) created_by_ntimes.intros(2) created_by_ntimes.intros(3) apply presburger + + apply (metis created_by_ntimes.simps rder.simps(5) rder.simps(7)) + using created_by_star.intros(1) created_by_star.intros(2) apply auto[1] + using created_by_ntimes.intros(1) created_by_ntimes.intros(3) apply auto[1] + by (metis (mono_tags, lifting) created_by_ntimes.simps list.simps(8) list.simps(9) rder.simps(1) rder.simps(4)) + +lemma ntimes_ders_cbn: + shows "created_by_ntimes (rders (RSEQ r' (RNTIMES r n)) s)" + apply(induct s rule: rev_induct) + apply simp + apply (simp add: created_by_ntimes.intros(2)) + apply(subst rders_append) + using cbn_ders_cbn by auto + +lemma always0: + shows "rders RZERO s = RZERO" + apply(induct s) + by simp+ + +lemma ntimes_ders_cbn1: + shows "created_by_ntimes (rders (RNTIMES r n) (c#s))" + apply(case_tac n) + apply simp + using always0 created_by_ntimes.intros(1) apply auto[1] + by (simp add: ntimes_ders_cbn) + + +lemma ntimes_hfau_pushin: + shows "created_by_ntimes r \ hflat_aux (rder c r) = concat (map hflat_aux (map (rder c) (hflat_aux r)))" + apply(induct r rule: created_by_ntimes.induct) + apply simp+ + done + + +abbreviation + "opterm r SN \ case SN of + Some (s, n) \ RSEQ (rders r s) (RNTIMES r n) + | None \ RZERO + + +" + +fun nonempty_string :: "(string * nat) option \ bool" where + "nonempty_string None = True" +| "nonempty_string (Some ([], n)) = False" +| "nonempty_string (Some (c#s, n)) = True" + + +lemma nupdate_nonempty: + shows "\\opt \ set Ss. nonempty_string opt \ \ \opt \ set (nupdate c r Ss). nonempty_string opt" + apply(induct c r Ss rule: nupdate.induct) + apply(auto) + apply (metis Nil_is_append_conv neq_Nil_conv nonempty_string.simps(3)) + by (metis Nil_is_append_conv neq_Nil_conv nonempty_string.simps(3)) + + + +lemma nupdates_nonempty: + shows "\\opt \ set Ss. nonempty_string opt \ \ \opt \ set (nupdates s r Ss). nonempty_string opt" + apply(induct s arbitrary: Ss) + apply simp + apply simp + using nupdate_nonempty by presburger + +lemma nullability1: shows "rnullable (rders r s) = rnullable (rders_simp r s)" + by (metis der_simp_nullability rders.simps(1) rders_simp.simps(1) rders_simp_same_simpders) + +lemma nupdate_induct1: + shows + "concat (map (hflat_aux \ (rder c \ (opterm r))) sl ) = + map (opterm r) (nupdate c r sl)" + apply(induct sl) + apply simp + apply(simp add: rders_append) + apply(case_tac "a") + apply simp+ + apply(case_tac "aa") + apply(case_tac "b") + apply(case_tac "rnullable (rders r ab)") + apply(subgoal_tac "rnullable (rders_simp r ab)") + apply simp + using rders.simps(1) rders.simps(2) rders_append apply presburger + using nullability1 apply blast + apply simp + using rders.simps(1) rders.simps(2) rders_append apply presburger + apply simp + using rders.simps(1) rders.simps(2) rders_append by presburger + + +lemma nupdates_join_general: + shows "concat (map hflat_aux (map (rder x) (map (opterm r) (nupdates xs r Ss)) )) = + map (opterm r) (nupdates (xs @ [x]) r Ss)" + apply(induct xs arbitrary: Ss) + apply (simp) + prefer 2 + apply auto[1] + using nupdate_induct1 by blast + + +lemma nupdates_join_general1: + shows "concat (map (hflat_aux \ (rder x) \ (opterm r)) (nupdates xs r Ss)) = + map (opterm r) (nupdates (xs @ [x]) r Ss)" + by (metis list.map_comp nupdates_join_general) + +lemma nupdates_append: shows +"nupdates (s @ [c]) r Ss = nupdate c r (nupdates s r Ss)" + apply(induct s arbitrary: Ss) + apply simp + apply simp + done + +lemma nupdates_mono: + shows "highest_power_aux (nupdates s r optlist) m \ highest_power_aux optlist m" + apply(induct s rule: rev_induct) + apply simp + apply(subst nupdates_append) + by (meson le_trans nupdate_mono) + +lemma nupdates_mono1: + shows "hpower (nupdates s r optlist) \ hpower optlist" + by (simp add: nupdates_mono) + + +(*"\r \ set (nupdates s r [Some ([c], n)]). r = None \( \s' m. r = Some (s', m) \ m \ n)"*) +lemma nupdates_mono2: + shows "hpower (nupdates s r [Some ([c], n)]) \ n" + by (metis highest_power_aux.simps(1) highest_power_aux.simps(3) hpower.simps max_nat.right_neutral nupdates_mono1) + +lemma hpow_arg_mono: + shows "m \ n \ highest_power_aux rs m \ highest_power_aux rs n" + apply(induct rs arbitrary: m n) + apply simp + apply(case_tac a) + apply simp + apply(case_tac aa) + apply simp + done + + +lemma hpow_increase: + shows "highest_power_aux (a # rs') m \ highest_power_aux rs' m" + apply(case_tac a) + apply simp + apply simp + apply(case_tac aa) + apply(case_tac b) + apply simp+ + apply(case_tac "Suc nat > m") + using hpow_arg_mono max.cobounded2 apply blast + using hpow_arg_mono max.cobounded2 by blast + +lemma hpow_append: + shows "highest_power_aux (rsa @ rsb) m = highest_power_aux rsb (highest_power_aux rsa m)" + apply (induct rsa arbitrary: rsb m) + apply simp + apply simp + apply(case_tac a) + apply simp + apply(case_tac aa) + apply simp + done + +lemma hpow_aux_mono: + shows "highest_power_aux (rsa @ rsb) m \ highest_power_aux rsb m" + apply(induct rsa arbitrary: rsb rule: rev_induct) + apply simp + apply simp + using hpow_increase order.trans by blast + + + + +lemma hpow_mono: + shows "hpower (rsa @ rsb) \ n \ hpower rsb \ n" + apply(induct rsb arbitrary: rsa) + apply simp + apply(subgoal_tac "hpower rsb \ n") + apply simp + apply (metis dual_order.trans hpow_aux_mono) + by (metis hpow_append hpow_increase hpower.simps nat_le_iff_add trans_le_add1) + + +lemma hpower_rs_elems_aux: + shows "highest_power_aux rs k \ n \ \r\set rs. r = None \ (\s' m. r = Some (s', m) \ m \ n)" +apply(induct rs k arbitrary: n rule: highest_power_aux.induct) + apply(auto) + by (metis dual_order.trans highest_power_aux.simps(1) hpow_append hpow_aux_mono linorder_le_cases max.absorb1 max.absorb2) + +lemma hpower_rs_elems: + shows "hpower rs \ n \ \r \ set rs. r = None \( \s' m. r = Some (s', m) \ m \ n)" + by (simp add: hpower_rs_elems_aux) + +lemma nupdates_elems_leqn: + shows "\r \ set (nupdates s r [Some ([c], n)]). r = None \( \s' m. r = Some (s', m) \ m \ n)" + by (meson hpower_rs_elems nupdates_mono2) + +lemma ntimes_hfau_induct: + shows "hflat_aux (rders (RSEQ (rder c r) (RNTIMES r n)) s) = + map (opterm r) (nupdates s r [Some ([c], n)])" + apply(induct s rule: rev_induct) + apply simp + apply(subst rders_append)+ + apply simp + apply(subst nupdates_append) + apply(subgoal_tac "created_by_ntimes (rders (RSEQ (rder c r) (RNTIMES r n)) xs)") + prefer 2 + apply (simp add: ntimes_ders_cbn) + apply(subst ntimes_hfau_pushin) + apply simp + apply(subgoal_tac "concat (map hflat_aux (map (rder x) (hflat_aux (rders (RSEQ (rder c r) (RNTIMES r n)) xs)))) = + concat (map hflat_aux (map (rder x) ( map (opterm r) (nupdates xs r [Some ([c], n)])))) ") + apply(simp only:) + prefer 2 + apply presburger + apply(subst nupdates_append[symmetric]) + using nupdates_join_general by blast + + +(*nupdates s r [Some ([c], n)]*) +lemma ntimes_ders_hfau_also1: + shows "hflat_aux (rders (RNTIMES r (Suc n)) (c # xs)) = map (opterm r) (nupdates xs r [Some ([c], n)])" + using ntimes_hfau_induct by force + + + +lemma hfau_rsimpeq2_ntimes: + shows "created_by_ntimes r \ rsimp r = rsimp ( (RALTS (hflat_aux r)))" + apply(induct r) + apply simp+ + + apply (metis rsimp_seq_equal1) + prefer 2 + apply simp + apply(case_tac x) + apply simp + apply(case_tac "list") + apply simp + + apply (metis idem_after_simp1) + apply(case_tac "lista") + prefer 2 + apply (metis hflat_aux.simps(8) idem_after_simp1 list.simps(8) list.simps(9) rsimp.simps(2)) + apply(subgoal_tac "rsimp (RALT a aa) = rsimp (RALTS (hflat_aux (RALT a aa)))") + apply simp + apply(subgoal_tac "rsimp (RALT a aa) = rsimp (RALTS (hflat_aux a @ hflat_aux aa))") + using hflat_aux.simps(1) apply presburger + apply simp + using cbs_hfau_rsimpeq1 apply(fastforce) + by simp + + +lemma ntimes_closed_form1: + shows "rsimp (rders (RNTIMES r (Suc n)) (c#s)) = +rsimp ( ( RALTS ( map (opterm r) (nupdates s r [Some ([c], n)]) )))" + apply(subgoal_tac "created_by_ntimes (rders (RNTIMES r (Suc n)) (c#s))") + apply(subst hfau_rsimpeq2_ntimes) + apply linarith + using ntimes_ders_hfau_also1 apply auto[1] + using ntimes_ders_cbn1 by blast + + +lemma ntimes_closed_form2: + shows "rsimp (rders_simp (RNTIMES r (Suc n)) (c#s) ) = +rsimp ( ( RALTS ( (map (opterm r ) (nupdates s r [Some ([c], n)]) ) )))" + by (metis list.distinct(1) ntimes_closed_form1 rders_simp_same_simpders rsimp_idem) + + +lemma ntimes_closed_form3: + shows "rsimp (rders_simp (RNTIMES r n) (c#s)) = (rders_simp (RNTIMES r n) (c#s))" + by (metis list.distinct(1) rders_simp_same_simpders rsimp_idem) + + +lemma ntimes_closed_form4: + shows " (rders_simp (RNTIMES r (Suc n)) (c#s)) = +rsimp ( ( RALTS ( (map (opterm r ) (nupdates s r [Some ([c], n)]) ) )))" + using ntimes_closed_form2 ntimes_closed_form3 + by metis + + + + +lemma ntimes_closed_form5: + shows " rsimp ( RALTS (map (\s1. RSEQ (rders r0 s1) (RNTIMES r n) ) Ss)) = + rsimp ( RALTS (map (\s1. rsimp (RSEQ (rders r0 s1) (RNTIMES r n)) ) Ss))" + by (smt (verit, ccfv_SIG) list.map_comp map_eq_conv o_apply simp_flatten_aux0) + + + +lemma ntimes_closed_form6_hrewrites: + shows " +(map (\s1. (RSEQ (rsimp (rders r0 s1)) (RNTIMES r0 n)) ) Ss ) + scf\* +(map (\s1. rsimp (RSEQ (rders r0 s1) (RNTIMES r0 n)) ) Ss )" + apply(induct Ss) + apply simp + apply (simp add: ss1) + by (metis (no_types, lifting) list.simps(9) rsimp.simps(1) rsimp_idem simp_hrewrites ss2) + + + +lemma ntimes_closed_form6: + shows " rsimp ( ( RALTS ( (map (\s1. rsimp (RSEQ (rders r0 s1) (RNTIMES r0 n)) ) Ss )))) = + rsimp ( ( RALTS ( (map (\s1. (RSEQ (rsimp (rders r0 s1)) (RNTIMES r0 n)) ) Ss ))))" + apply(subgoal_tac " map (\s1. (RSEQ (rsimp (rders r0 s1)) (RNTIMES r0 n)) ) Ss scf\* + map (\s1. rsimp (RSEQ (rders r0 s1) (RNTIMES r0 n)) ) Ss ") + using hrewrites_simpeq srewritescf_alt1 apply fastforce + using ntimes_closed_form6_hrewrites by blast + +abbreviation + "optermsimp r SN \ case SN of + Some (s, n) \ RSEQ (rders_simp r s) (RNTIMES r n) + | None \ RZERO + + +" + +abbreviation + "optermOsimp r SN \ case SN of + Some (s, n) \ rsimp (RSEQ (rders r s) (RNTIMES r n)) + | None \ RZERO + + +" + +abbreviation + "optermosimp r SN \ case SN of + Some (s, n) \ RSEQ (rsimp (rders r s)) (RNTIMES r n) + | None \ RZERO +" + +lemma ntimes_closed_form51: + shows "rsimp (RALTS (map (opterm r) (nupdates s r [Some ([c], n)]))) = + rsimp (RALTS (map (rsimp \ (opterm r)) (nupdates s r [Some ([c], n)])))" + by (metis map_map simp_flatten_aux0) + + + +lemma osimp_Osimp: + shows " nonempty_string sn \ optermosimp r sn = optermsimp r sn" + apply(induct rule: nonempty_string.induct) + apply force + apply auto[1] + apply simp + by (metis list.distinct(1) rders.simps(2) rders_simp.simps(2) rders_simp_same_simpders) + + + +lemma osimp_Osimp_list: + shows "\sn \ set snlist. nonempty_string sn \ map (optermosimp r) snlist = map (optermsimp r) snlist" + by (simp add: osimp_Osimp) + + +lemma ntimes_closed_form8: + shows +"rsimp (RALTS (map (optermosimp r) (nupdates s r [Some ([c], n)]))) = + rsimp (RALTS (map (optermsimp r) (nupdates s r [Some ([c], n)])))" + apply(subgoal_tac "\opt \ set (nupdates s r [Some ([c], n)]). nonempty_string opt") + using osimp_Osimp_list apply presburger + by (metis list.distinct(1) list.set_cases nonempty_string.simps(3) nupdates_nonempty set_ConsD) + + + +lemma ntimes_closed_form9aux: + shows "\snopt \ set (nupdates s r [Some ([c], n)]). nonempty_string snopt" + by (metis list.distinct(1) list.set_cases nonempty_string.simps(3) nupdates_nonempty set_ConsD) + +lemma ntimes_closed_form9aux1: + shows "\snopt \ set snlist. nonempty_string snopt \ +rsimp (RALTS (map (optermosimp r) snlist)) = +rsimp (RALTS (map (optermOsimp r) snlist))" + apply(induct snlist) + apply simp+ + apply(case_tac "a") + apply simp+ + by (smt (z3) case_prod_conv idem_after_simp1 map_eq_conv nonempty_string.elims(2) o_apply option.simps(4) option.simps(5) rsimp.simps(1) rsimp.simps(7) rsimp_idem) + + + + +lemma ntimes_closed_form9: + shows +"rsimp (RALTS (map (optermosimp r) (nupdates s r [Some ([c], n)]))) = + rsimp (RALTS (map (optermOsimp r) (nupdates s r [Some ([c], n)])))" + using ntimes_closed_form9aux ntimes_closed_form9aux1 by presburger + + +lemma ntimes_closed_form10rewrites_aux: + shows " map (rsimp \ (opterm r)) optlist scf\* + map (optermOsimp r) optlist" + apply(induct optlist) + apply simp + apply (simp add: ss1) + apply simp + apply(case_tac a) + using ss2 apply fastforce + using ss2 by force + + +lemma ntimes_closed_form10rewrites: + shows " map (rsimp \ (opterm r)) (nupdates s r [Some ([c], n)]) scf\* + map (optermOsimp r) (nupdates s r [Some ([c], n)])" + using ntimes_closed_form10rewrites_aux by blast + +lemma ntimes_closed_form10: + shows "rsimp (RALTS (map (rsimp \ (opterm r)) (nupdates s r [Some ([c], n)]))) = + rsimp (RALTS (map (optermOsimp r) (nupdates s r [Some ([c], n)])))" + by (smt (verit, best) case_prod_conv hpower_rs_elems map_eq_conv nupdates_mono2 o_apply option.case(2) option.simps(4) rsimp.simps(3)) + + +lemma rders_simp_cons: + shows "rders_simp r (c # s) = rders_simp (rsimp (rder c r)) s" + by simp + +lemma rder_ntimes: + shows "rder c (RNTIMES r (Suc n)) = RSEQ (rder c r) (RNTIMES r n)" + by simp + + +lemma ntimes_closed_form: + shows "rders_simp (RNTIMES r0 (Suc n)) (c#s) = +rsimp ( RALTS ( (map (optermsimp r0 ) (nupdates s r0 [Some ([c], n)]) ) ))" + apply (subst rders_simp_cons) + apply(subst rder_ntimes) + using ntimes_closed_form10 ntimes_closed_form4 ntimes_closed_form51 ntimes_closed_form8 ntimes_closed_form9 by force + + + + + + +(* +lemma ntimes_closed_form: + assumes "s \ []" + shows "rders_simp (RNTIMES r (Suc n)) s = +rsimp ( RALTS ( map + (\ optSN. case optSN of + Some (s, n) \ RSEQ (rders_simp r s) (RNTIMES r n) + | None \ RZERO + ) + (ntset r n s) + ) + )" + +*) end \ No newline at end of file diff -r 56057198e4f5 -r 70c10dc41606 thys3/ClosedFormsBounds.thy --- a/thys3/ClosedFormsBounds.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/ClosedFormsBounds.thy Fri May 26 08:10:17 2023 +0100 @@ -2,8 +2,6 @@ theory ClosedFormsBounds imports "GeneralRegexBound" "ClosedForms" begin - - lemma alts_ders_lambda_shape_ders: shows "\r \ set (map (\r. rders_simp r ( s)) rs ). \r1 \ set rs. r = rders_simp r1 s" by (simp add: image_iff) @@ -16,12 +14,6 @@ apply simp by simp -lemma distinct_list_size_len_bounded: - assumes "\r \ set rs. rsize r \ N" "length rs \ lrs" - shows "rsizes rs \ lrs * N " - using assms - by (metis rlist_bound dual_order.trans mult.commute mult_le_mono1) - lemma alts_closed_form_bounded: assumes "\r \ set rs. \s. rsize (rders_simp r s) \ N" shows "rsize (rders_simp (RALTS rs) s) \ max (Suc (N * (length rs))) (rsize (RALTS rs))" @@ -149,7 +141,7 @@ apply(simp only:) apply(subgoal_tac "rsizes (rdistinct rs (insert RZERO (insert RONE noalts_set \ alts_set))) \ rsizes (RONE # rdistinct rs (insert RZERO (insert RONE noalts_set \ alts_set)))") - apply (smt (verit, best) dual_order.trans insert_iff rrexp.distinct(15)) + apply (smt (verit, ccfv_threshold) dual_order.trans insertE rrexp.distinct(17)) apply (metis (no_types, opaque_lifting) le_add_same_cancel2 list.simps(9) sum_list.Cons zero_le) apply fastforce apply fastforce @@ -159,8 +151,8 @@ using rflts.simps(4) apply presburger apply simp apply(subgoal_tac "insert RONE (noalts_set \ corr_set) = (insert RONE noalts_set) \ corr_set") - apply(simp only:) - apply (metis Un_insert_left insertE rrexp.distinct(15)) + apply(simp only:) + apply (metis Un_insert_left insertE rrexp.distinct(17)) apply fastforce apply(case_tac "a \ noalts_set") apply simp @@ -182,14 +174,14 @@ apply(subgoal_tac "(insert a (noalts_set \ alts_set)) = (insert a noalts_set) \ alts_set") apply(simp only:) apply(subgoal_tac "noalts_set \ corr_set = (insert a noalts_set) \ corr_set") - apply(simp only:) - apply (metis insertE rrexp.distinct(21)) + apply(simp only:) + apply (metis insertE nonalt.simps(1) nonalt.simps(4)) apply blast apply fastforce apply force - apply simp - apply (metis Un_insert_left insert_iff rrexp.distinct(21)) + apply simp + apply (metis Un_insert_left insertE nonalt.simps(1) nonalt.simps(4)) apply(case_tac "a \ noalts_set") apply simp apply(subgoal_tac "a \ alts_set") @@ -212,14 +204,13 @@ apply(subgoal_tac "noalts_set \ corr_set = (insert a noalts_set) \ corr_set") apply(simp only:) - - apply (metis insertE rrexp.distinct(25)) + apply (metis insertE rrexp.distinct(31)) apply blast apply fastforce apply force apply simp - apply (metis Un_insert_left insertE rrexp.distinct(25)) + apply (metis Un_insert_left insertE rrexp.distinct(31)) using Suc_le_mono flts_size_reduction_alts apply presburger apply(case_tac "a \ noalts_set") @@ -242,16 +233,42 @@ apply(subgoal_tac "(insert a (noalts_set \ alts_set)) = (insert a noalts_set) \ alts_set") apply(simp only:) apply(subgoal_tac "noalts_set \ corr_set = (insert a noalts_set) \ corr_set") - apply(simp only:) - apply (metis insertE rrexp.distinct(29)) + apply(simp only:) + apply (metis insertE rrexp.distinct(37)) apply blast apply fastforce apply force apply simp - apply (metis Un_insert_left insert_iff rrexp.distinct(29)) - done + apply (metis Un_insert_left insert_iff rrexp.distinct(37)) + apply(case_tac "a \ noalts_set") + apply simp + apply(subgoal_tac "a \ alts_set") + prefer 2 + apply blast + apply(case_tac "a \ corr_set") + apply(subgoal_tac "noalts_set \ corr_set = insert a ( noalts_set \ corr_set)") + prefer 2 + apply fastforce + apply(simp only:) + apply(subgoal_tac "rsizes (rdistinct (a # rs) (insert RZERO ((insert a noalts_set) \ alts_set))) \ + rsizes (rdistinct (a # rs) (insert RZERO (noalts_set \ alts_set)))") + + apply(subgoal_tac "rsizes (rdistinct (rflts (a # rs)) ((insert a noalts_set) \ corr_set)) \ + rsizes (rdistinct (a # rs) (insert RZERO ((insert a noalts_set) \ alts_set)))") + apply fastforce + apply simp + apply(subgoal_tac "(insert a (noalts_set \ alts_set)) = (insert a noalts_set) \ alts_set") + apply(simp only:) + apply(subgoal_tac "noalts_set \ corr_set = (insert a noalts_set) \ corr_set") + apply(simp only:) + apply (metis insertE nonalt.simps(1) nonalt.simps(7)) + apply blast + apply blast + apply force + apply(auto) + by (metis Un_insert_left insert_iff rrexp.distinct(39)) lemma flts_vs_nflts: @@ -318,6 +335,11 @@ by (simp add: larger_acc_smaller_distinct_res) +lemma distinct_list_size_len_bounded: + assumes "\r \ set rs. rsize r \ N" "length rs \ lrs" + shows "rsizes rs \ lrs * N " + using assms + by (metis rlist_bound dual_order.trans mult.commute mult_le_mono1) @@ -382,6 +404,220 @@ qed +thm ntimes_closed_form + +thm rsize.simps + +lemma nupdates_snoc: + shows " (nupdates (xs @ [x]) r optlist) = nupdate x r (nupdates xs r optlist)" + by (simp add: nupdates_append) + +lemma nupdate_elems: + shows "\opt \ set (nupdate c r optlist). opt = None \ (\s n. opt = Some (s, n))" + using nonempty_string.cases by auto + +lemma nupdates_elems: + shows "\opt \ set (nupdates s r optlist). opt = None \ (\s n. opt = Some (s, n))" + by (meson nonempty_string.cases) + + +lemma opterm_optlist_result_shape: + shows "\r' \ set (map (optermsimp r) optlist). r' = RZERO \ (\s m. r' = RSEQ (rders_simp r s) (RNTIMES r m))" + apply(induct optlist) + apply simp + apply(case_tac a) + apply simp+ + by fastforce + + +lemma opterm_optlist_result_shape2: + shows "\optlist. \r' \ set (map (optermsimp r) optlist). r' = RZERO \ (\s m. r' = RSEQ (rders_simp r s) (RNTIMES r m))" + using opterm_optlist_result_shape by presburger + + +lemma nupdate_n_leq_n: + shows "\r \ set (nupdate c' r [Some ([c], n)]). r = None \( \s' m. r = Some (s', m) \ m \ n)" + apply(case_tac n) + apply simp + apply simp + done +(* +lemma nupdate_induct_leqn: + shows "\\opt \ set optlist. opt = None \ (\s' m. opt = Some(s', m) \ m \ n) \ \ + \opt \ set (nupdate c' r optlist). opt = None \ (\s' m. opt = Some (s', m) \ m \ n)" + apply (case_tac optlist) + apply simp + apply(case_tac a) + apply simp + sledgehammer +*) + + +lemma nupdates_n_leq_n: + shows "\r \ set (nupdates s r [Some ([c], n)]). r = None \( \s' m. r = Some (s', m) \ m \ n)" + apply(induct s rule: rev_induct) + apply simp + apply(subst nupdates_append) + by (metis nupdates_elems_leqn nupdates_snoc) + + + +lemma ntimes_closed_form_list_elem_shape: + shows "\r' \ set (map (optermsimp r) (nupdates s r [Some ([c], n)])). +r' = RZERO \ (\s' m. r' = RSEQ (rders_simp r s') (RNTIMES r m) \ m \ n)" + apply(insert opterm_optlist_result_shape2) + apply(case_tac s) + apply(auto) + apply (metis rders_simp_one_char) + by (metis case_prod_conv nupdates.simps(2) nupdates_n_leq_n option.simps(4) option.simps(5)) + + +lemma ntimes_trivial1: + shows "rsize RZERO \ N + rsize (RNTIMES r n)" + by simp + + +lemma ntimes_trivial20: + shows "m \ n \ rsize (RNTIMES r m) \ rsize (RNTIMES r n)" + by simp + + +lemma ntimes_trivial2: + assumes "\s. rsize (rders_simp r s) \ N" + shows " r' = RSEQ (rders_simp r s1) (RNTIMES r m) \ m \ n + \ rsize r' \ Suc (N + rsize (RNTIMES r n))" + apply simp + by (simp add: add_mono_thms_linordered_semiring(1) assms) + +lemma ntimes_closed_form_list_elem_bounded: + assumes "\s. rsize (rders_simp r s) \ N" + shows "\r' \ set (map (optermsimp r) (nupdates s r [Some ([c], n)])). rsize r' \ Suc (N + rsize (RNTIMES r n))" + apply(rule ballI) + apply(subgoal_tac "r' = RZERO \ (\s' m. r' = RSEQ (rders_simp r s') (RNTIMES r m) \ m \ n)") + prefer 2 + using ntimes_closed_form_list_elem_shape apply blast + apply(case_tac "r' = RZERO") + using le_SucI ntimes_trivial1 apply presburger + apply(subgoal_tac "\s1 m. r' = RSEQ (rders_simp r s1) (RNTIMES r m) \ m \ n") + apply(erule exE)+ + using assms ntimes_trivial2 apply presburger + by blast + + +lemma P_holds_after_distinct: + assumes "\r \ set rs. P r" + shows "\r \ set (rdistinct rs rset). P r" + by (simp add: assms rdistinct_set_equality1) + +lemma ntimes_control_bounded: + assumes "\s. rsize (rders_simp r s) \ N" + shows "rsizes (rdistinct (map (optermsimp r) (nupdates s r [Some ([c], n)])) {}) + \ (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * (Suc (N + rsize (RNTIMES r n)))" + apply(subgoal_tac "\r' \ set (rdistinct (map (optermsimp r) (nupdates s r [Some ([c], n)])) {}). + rsize r' \ Suc (N + rsize (RNTIMES r n))") + apply (meson distinct_list_rexp_upto rdistinct_same_set) + apply(subgoal_tac "\r' \ set (map (optermsimp r) (nupdates s r [Some ([c], n)])). rsize r' \ Suc (N + rsize (RNTIMES r n))") + apply (simp add: rdistinct_set_equality) + by (metis assms nat_le_linear not_less_eq_eq ntimes_closed_form_list_elem_bounded) + + + +lemma ntimes_closed_form_bounded0: + assumes "\s. rsize (rders_simp r s) \ N" + shows " (rders_simp (RNTIMES r 0) s) = RZERO \ (rders_simp (RNTIMES r 0) s) = RNTIMES r 0 + " + apply(induct s) + apply simp + by (metis always0 list.simps(3) rder.simps(7) rders.simps(2) rders_simp_same_simpders rsimp.simps(3)) + +lemma ntimes_closed_form_bounded1: + assumes "\s. rsize (rders_simp r s) \ N" + shows " rsize (rders_simp (RNTIMES r 0) s) \ max (rsize RZERO) (rsize (RNTIMES r 0))" + + by (metis assms max.cobounded1 max.cobounded2 ntimes_closed_form_bounded0) + +lemma self_smaller_than_bound: + shows "\s. rsize (rders_simp r s) \ N \ rsize r \ N" + apply(drule_tac x = "[]" in spec) + apply simp + done + +lemma ntimes_closed_form_bounded_nil_aux: + shows "max (rsize RZERO) (rsize (RNTIMES r 0)) = 1 + rsize r" + by auto + +lemma ntimes_closed_form_bounded_nil: + assumes "\s. rsize (rders_simp r s) \ N" + shows " rsize (rders_simp (RNTIMES r 0) s) \ 1 + rsize r" + using assms ntimes_closed_form_bounded1 by auto + +lemma ntimes_ineq1: + shows "(rsize (RNTIMES r n)) \ 1 + rsize r" + by simp + +lemma ntimes_ineq2: + shows "1 + rsize r \ +max ((Suc (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * (Suc (N + rsize (RNTIMES r n))))) (rsize (RNTIMES r n))" + by (meson le_max_iff_disj ntimes_ineq1) + +lemma ntimes_closed_form_bounded: + assumes "\s. rsize (rders_simp r s) \ N" + shows "rsize (rders_simp (RNTIMES r (Suc n)) s) \ + max ((Suc (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * (Suc (N + rsize (RNTIMES r n))))) (rsize (RNTIMES r n))" +proof(cases s) + case Nil + then show "rsize (rders_simp (RNTIMES r (Suc n)) s) + \ max (Suc (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * Suc (N + rsize (RNTIMES r n))) (rsize (RNTIMES r n))" + by simp +next + case (Cons a list) + + then have "rsize (rders_simp (RNTIMES r (Suc n)) s) = + rsize (rsimp (RALTS ((map (optermsimp r) (nupdates list r [Some ([a], n)])))))" + using ntimes_closed_form by fastforce + also have "... \ Suc (rsizes (rdistinct ((map (optermsimp r) (nupdates list r [Some ([a], n)]))) {}))" + using alts_simp_control by blast + also have "... \ Suc (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * (Suc (N + rsize (RNTIMES r n)))" + using ntimes_control_bounded[OF assms] + by (metis add_mono le_add1 mult_Suc plus_1_eq_Suc) + also have "... \ max (Suc (card (sizeNregex (Suc (N + rsize (RNTIMES r n))))) * Suc (N + rsize (RNTIMES r n))) (rsize (RNTIMES r n))" + by simp + finally show ?thesis by simp +qed + + +lemma ntimes_closed_form_boundedA: + assumes "\s. rsize (rders_simp r s) \ N" + shows "\N'. \s. rsize (rders_simp (RNTIMES r n) s) \ N'" + apply(case_tac n) + using assms ntimes_closed_form_bounded_nil apply blast + using assms ntimes_closed_form_bounded by blast + + +lemma star_closed_form_nonempty_bounded: + assumes "\s. rsize (rders_simp r s) \ N" and "s \ []" + shows "rsize (rders_simp (RSTAR r) s) \ + ((Suc (card (sizeNregex (Suc (N + rsize (RSTAR r))))) * (Suc (N + rsize (RSTAR r))))) " +proof(cases s) + case Nil + then show ?thesis + using local.Nil by fastforce +next + case (Cons a list) + then have "rsize (rders_simp (RSTAR r) s) = + rsize (rsimp (RALTS ((map (\s1. RSEQ (rders_simp r s1) (RSTAR r)) (star_updates list r [[a]])))))" + using star_closed_form by fastforce + also have "... \ Suc (rsizes (rdistinct (map (\s1. RSEQ (rders_simp r s1) (RSTAR r)) (star_updates list r [[a]])) {}))" + using alts_simp_control by blast + also have "... \ Suc (card (sizeNregex (Suc (N + rsize (RSTAR r))))) * (Suc (N + rsize (RSTAR r)))" + by (smt (z3) add_mono_thms_linordered_semiring(1) assms(1) le_add1 map_eq_conv mult_Suc plus_1_eq_Suc star_control_bounded) + also have "... \ max (Suc (card (sizeNregex (Suc (N + rsize (RSTAR r))))) * Suc (N + rsize (RSTAR r))) (rsize (RSTAR r))" + by simp + finally show ?thesis by simp +qed + + + lemma seq_estimate_bounded: assumes "\s. rsize (rders_simp r1 s) \ N1" and "\s. rsize (rders_simp r2 s) \ N2" @@ -428,7 +664,6 @@ by auto qed - lemma rders_simp_bounded: shows "\N. \s. rsize (rders_simp r s) \ N" apply(induct r) @@ -443,9 +678,12 @@ apply(assumption) apply(assumption) apply (metis alts_closed_form_bounded size_list_estimation') - using star_closed_form_bounded by blast + using star_closed_form_bounded apply blast + using ntimes_closed_form_boundedA by blast + + +unused_thms +export_code rders_simp rsimp rder in Scala module_name Example -unused_thms - end diff -r 56057198e4f5 -r 70c10dc41606 thys3/FBound.thy --- a/thys3/FBound.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/FBound.thy Fri May 26 08:10:17 2023 +0100 @@ -18,6 +18,7 @@ | "rerase (AALTs bs rs) = RALTS (map rerase rs)" | "rerase (ASEQ _ r1 r2) = RSEQ (rerase r1) (rerase r2)" | "rerase (ASTAR _ r) = RSTAR (rerase r)" +| "rerase (ANTIMES _ r n) = RNTIMES (rerase r) n" lemma eq1_rerase: shows "x ~1 y \ (rerase x) = (rerase y)" diff -r 56057198e4f5 -r 70c10dc41606 thys3/GeneralRegexBound.thy --- a/thys3/GeneralRegexBound.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/GeneralRegexBound.thy Fri May 26 08:10:17 2023 +0100 @@ -18,6 +18,10 @@ definition RALTs_set where "RALTs_set A n \ {RALTS rs | rs. \r \ set rs. r \ A \ rsizes rs \ n}" +definition RNTIMES_set where + "RNTIMES_set A n \ {RNTIMES r m | m r. r \ A \ rsize r + m \ n}" + + definition "sizeNregex N \ {r. rsize r \ N}" @@ -26,7 +30,8 @@ "sizeNregex (Suc n) = (({RZERO, RONE} \ {RCHAR c| c. True}) \ (RSTAR ` sizeNregex n) \ (RSEQ_set (sizeNregex n) n) - \ (RALTs_set (sizeNregex n) n))" + \ (RALTs_set (sizeNregex n) n)) + \ (RNTIMES_set (sizeNregex n) n)" apply(auto) apply(case_tac x) apply(auto simp add: RSEQ_set_def) @@ -37,15 +42,21 @@ apply (simp add: RALTs_set_def) apply (metis imageI list.set_map member_le_sum_list order_trans) apply (simp add: sizeNregex_def) - apply (simp add: sizeNregex_def) + apply (simp add: sizeNregex_def) + apply (simp add: RNTIMES_set_def) apply (simp add: sizeNregex_def) using sizeNregex_def apply force apply (simp add: sizeNregex_def) apply (simp add: sizeNregex_def) - apply (simp add: RALTs_set_def) + apply (simp add: sizeNregex_def) + apply (simp add: RALTs_set_def) apply(simp add: sizeNregex_def) apply(auto) - using ex_in_conv by fastforce + using ex_in_conv apply fastforce + apply (simp add: RNTIMES_set_def) + apply(simp add: sizeNregex_def) + by force + lemma s4: "RSEQ_set A n \ RSEQ_set_cartesian A" @@ -155,6 +166,22 @@ apply simp by (simp add: full_SetCompr_eq) +thm RNTIMES_set_def + +lemma s9_aux0: + shows "RNTIMES_set (insert r A) n \ RNTIMES_set A n \ (\ i \ {..n}. {RNTIMES r i})" +apply(auto simp add: RNTIMES_set_def) + done + +lemma s9_aux: + assumes "finite A" + shows "finite (RNTIMES_set A n)" + using assms + apply(induct A arbitrary: n) + apply(auto simp add: RNTIMES_set_def)[1] + apply(subgoal_tac "finite (RNTIMES_set F n \ (\ i \ {..n}. {RNTIMES x i}))") + apply (metis finite_subset s9_aux0) + by blast lemma finite_size_n: shows "finite (sizeNregex n)" @@ -175,8 +202,8 @@ apply(rule finite_subset) apply(rule t2) apply(rule s8_aux) - apply(simp) - done + apply(simp) + by (simp add: s9_aux) lemma three_easy_cases0: shows "rsize (rders_simp RZERO s) \ Suc 0" diff -r 56057198e4f5 -r 70c10dc41606 thys3/Lexer.thy --- a/thys3/Lexer.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/Lexer.thy Fri May 26 08:10:17 2023 +0100 @@ -3,7 +3,7 @@ imports PosixSpec begin -section {* The Lexer Functions by Sulzmann and Lu (without simplification) *} +section \The Lexer Functions by Sulzmann and Lu (without simplification)\ fun mkeps :: "rexp \ val" @@ -12,6 +12,7 @@ | "mkeps(SEQ r1 r2) = Seq (mkeps r1) (mkeps r2)" | "mkeps(ALT r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))" | "mkeps(STAR r) = Stars []" +| "mkeps(NTIMES r n) = Stars (replicate n (mkeps r))" fun injval :: "rexp \ char \ val \ val" where @@ -22,6 +23,7 @@ | "injval (SEQ r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2" | "injval (SEQ r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)" | "injval (STAR r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" +| "injval (NTIMES r n) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" fun lexer :: "rexp \ string \ val option" @@ -33,20 +35,32 @@ -section {* Mkeps, Injval Properties *} +section \Mkeps, Injval Properties\ + +lemma mkeps_flat: + assumes "nullable(r)" + shows "flat (mkeps r) = []" +using assms + by (induct rule: mkeps.induct) (auto) + +lemma Prf_NTimes_empty: + assumes "\v \ set vs. \ v : r \ flat v = []" + and "length vs = n" + shows "\ Stars vs : NTIMES r n" + using assms + by (metis Prf.intros(7) empty_iff eq_Nil_appendI list.set(1)) + lemma mkeps_nullable: assumes "nullable(r)" shows "\ mkeps r : r" using assms -by (induct rule: nullable.induct) - (auto intro: Prf.intros) - -lemma mkeps_flat: - assumes "nullable(r)" - shows "flat (mkeps r) = []" -using assms -by (induct rule: nullable.induct) (auto) + apply (induct rule: mkeps.induct) + apply(auto intro: Prf.intros split: if_splits) + apply (metis Prf.intros(7) append_is_Nil_conv empty_iff list.set(1) list.size(3)) + apply(rule Prf_NTimes_empty) + apply(auto simp add: mkeps_flat) + done lemma Prf_injval_flat: assumes "\ v : der c r" @@ -62,14 +76,20 @@ using assms apply(induct r arbitrary: c v rule: rexp.induct) apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims split: if_splits) +(* Star *) apply(simp add: Prf_injval_flat) -done +(* NTimes *) + apply(case_tac x2) + apply(simp) + apply(simp) + apply(subst append.simps(2)[symmetric]) + apply(rule Prf.intros) + apply(auto simp add: Prf_injval_flat) + done +text \Mkeps and injval produce, or preserve, Posix values.\ -text {* - Mkeps and injval produce, or preserve, Posix values. -*} lemma Posix_mkeps: assumes "nullable r" @@ -80,7 +100,7 @@ apply(subst append.simps(1)[symmetric]) apply(rule Posix.intros) apply(auto) -done +by (simp add: Posix_NTIMES2 pow_empty_iff) lemma Posix_injval: assumes "s \ (der c r) \ v" @@ -228,11 +248,53 @@ ultimately have "((c # s1) @ s2) \ STAR r \ Stars (injval r c v1 # vs)" by (rule Posix.intros) then show "(c # s) \ STAR r \ injval (STAR r) c v" using cons by(simp) - qed + qed +next + case (NTIMES r n) + have IH: "\s v. s \ der c r \ v \ (c # s) \ r \ injval r c v" by fact + have "s \ der c (NTIMES r n) \ v" by fact + then consider + (cons) v1 vs s1 s2 where + "v = Seq v1 (Stars vs)" "s = s1 @ s2" + "s1 \ der c r \ v1" "s2 \ (NTIMES r (n - 1)) \ (Stars vs)" "0 < n" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (NTIMES r (n - 1)))" + apply(auto elim: Posix_elims simp add: der_correctness Der_def intro: Posix.intros split: if_splits) + apply(erule Posix_elims) + apply(simp) + apply(subgoal_tac "\vss. v2 = Stars vss") + apply(clarify) + apply(drule_tac x="vss" in meta_spec) + apply(drule_tac x="s1" in meta_spec) + apply(drule_tac x="s2" in meta_spec) + apply(simp add: der_correctness Der_def) + apply(erule Posix_elims) + apply(auto) + done + then show "(c # s) \ (NTIMES r n) \ injval (NTIMES r n) c v" + proof (cases) + case cons + have "s1 \ der c r \ v1" by fact + then have "(c # s1) \ r \ injval r c v1" using IH by simp + moreover + have "s2 \ (NTIMES r (n - 1)) \ Stars vs" by fact + moreover + have "(c # s1) \ r \ injval r c v1" by fact + then have "flat (injval r c v1) = (c # s1)" by (rule Posix1) + then have "flat (injval r c v1) \ []" by simp + moreover + have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L (der c r) \ s\<^sub>4 \ L (NTIMES r (n - 1)))" by fact + then have "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (c # s1) @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (NTIMES r (n - 1)))" + by (simp add: der_correctness Der_def) + ultimately + have "((c # s1) @ s2) \ NTIMES r n \ Stars (injval r c v1 # vs)" + by (metis One_nat_def Posix_NTIMES1 Suc_eq_plus1 Suc_pred cons(5)) + then show "(c # s) \ NTIMES r n \ injval (NTIMES r n) c v" using cons by(simp) + qed + qed -section {* Lexer Correctness *} +section \Lexer Correctness\ lemma lexer_correct_None: @@ -273,7 +335,7 @@ using Posix1(1) lexer_correct_None lexer_correct_Some by blast -subsection {* A slight reformulation of the lexer algorithm using stacked functions*} +subsection \A slight reformulation of the lexer algorithm using stacked functions\ fun flex :: "rexp \ (val \ val) => string \ (val \ val)" where @@ -354,7 +416,8 @@ apply(erule Prf_elims) apply(auto) apply (smt Prf_elims(6) injval.simps(7) list.inject val.inject(5)) - by (smt Prf_elims(6) injval.simps(7) list.inject val.inject(5)) + apply (smt Prf_elims(6) injval.simps(7) list.inject val.inject(5)) + by (smt (verit, best) Prf_elims(1) Prf_elims(2) Prf_elims(7) injval.simps(8) list.inject val.simps(5)) @@ -375,7 +438,7 @@ apply (simp add: lexer_correctness(1)) apply(subgoal_tac "\ a : (der c r)") prefer 2 - using Posix_Prf apply blast + using Posix1a apply blast using injval_inj by blast diff -r 56057198e4f5 -r 70c10dc41606 thys3/LexerSimp.thy --- a/thys3/LexerSimp.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/LexerSimp.thy Fri May 26 08:10:17 2023 +0100 @@ -2,7 +2,7 @@ imports "Lexer" begin -section {* Lexer including some simplifications *} +section \Lexer including some simplifications\ fun F_RIGHT where diff -r 56057198e4f5 -r 70c10dc41606 thys3/Paper.thy --- a/thys3/Paper.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/Paper.thy Fri May 26 08:10:17 2023 +0100 @@ -97,7 +97,7 @@ expressions have sparked quite a bit of interest in the functional programming and theorem prover communities. Derivatives of a -regular expression, written @{term "der c r"}, give a simple solution +regular expressions, written @{term "der c r"}, give a simple solution to the problem of matching a string @{term s} with a regular expression @{term r}: if the derivative of @{term r} w.r.t.\ (in succession) all the characters of the string matches the empty string, @@ -216,7 +216,10 @@ of POSIX lexing from our earlier work \cite{AusafDyckhoffUrban2016}. This serves as a reference point for what correctness means in our Isabelle/HOL proofs. We shall then prove the correctness for the bitcoded algorithm without simplification, and -after that extend the proof to include simplification.\mbox{}\\[-6mm] +after that extend the proof to include simplification. +Our Isabelle code including the results from Sec.~5 is available +from \textcolor{darkblue}{\url{https://github.com/urbanchr/posix}}. +\mbox{}\\[-6mm] *} @@ -256,9 +259,10 @@ In our work here we also add to the usual ``basic'' regular expressions the \emph{bounded} regular expression @{term "NTIMES r n"} where the @{term n} specifies that @{term r} should match - exactly @{term n}-times. Again for brevity we omit the other bounded - regular expressions @{text "r"}$^{\{..n\}}$, @{text "r"}$^{\{n..\}}$ - and @{text "r"}$^{\{n..m\}}$ which specify intervals for how many + exactly @{term n}-times (it is not included in Sulzmann and Lu's original work). For brevity we omit the other bounded + regular expressions @{text "r"}$^{\{..\textit{n}\}}$, + @{text "r"}$^{\{\textit{n}..\}}$ + and @{text "r"}$^{\{\textit{n}..\textit{m}\}}$ which specify intervals for how many times @{text r} should match. The results presented in this paper extend straightforwardly to them too. The importance of the bounded regular expressions is that they are often used in practical @@ -267,7 +271,7 @@ al~\cite{BjorklundMartensTimm2015}, bounded regular expressions occur frequently in the latter and can have counters of up to ten million. The problem is that tools based on the classic notion - of automata need to expand @{text "r"}$^{\{n\}}$ into @{text n} + of automata need to expand @{text "r"}$^{\{\textit{n}\}}$ into @{text n} connected copies of the automaton for @{text r}. This leads to very inefficient matching algorithms or algorithms that consume large amounts of memory. A classic example is the regular expression @@ -278,18 +282,27 @@ adhoc limits for bounded regular expressions: For example in the regular expression matching library in the Go language and also in Google's RE2 library the regular expression @{term "NTIMES a 1001"} is not permitted, because no counter can be - above 1000; and in the built-in regular expression library in Rust + above 1000; and in the regular expression library in Rust expressions such as @{text "a\<^bsup>{1000}{100}{5}\<^esup>"} give an error - message for being too big. These problems can of course be solved in matching + message for being too big. Up until recently,\footnote{up until version 1.5.4 of the regex + library in Rust; see also CVE-2022-24713.} Rust + however happily generated automata for regular expressions such as + @{text "a\<^bsup>{0}{4294967295}\<^esup>"}. This was due to a bug + in the algorithm that decides when a regular expression is acceptable + or too big according to Rust's classification (it did not account for the fact that @{text "a\<^bsup>{0}\<^esup>"} and similar examples can match the empty string). We shall come back to + this example later in the paper. + These problems can of course be solved in matching algorithms where automata go beyond the classic notion and for - instance include explicit counters (see~\cite{CountingSet2020}). + instance include explicit counters (e.g.~\cite{CountingSet2020}). The point here is that Brzozowski derivatives and the algorithms by Sulzmann and Lu can be straightforwardly extended to deal with bounded regular expressions and moreover the resulting code still consists of only simple recursive functions and inductive datatypes. Finally, bounded regular expressions do not destroy our finite boundedness property, which we shall - prove later on.%, because during the lexing process counters will only be + prove later on. + + %, because during the lexing process counters will only be %decremented. @@ -385,15 +398,15 @@ % \begin{center} \begin{tabular}{@ {}l@ {}} - @{thm[mode=Axiom] Prf.intros(4)}\quad - @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\quad - @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\quad + @{thm[mode=Axiom] Prf.intros(4)}\qquad + @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\qquad + @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\qquad @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\qquad @{thm[mode=Rule] Prf.intros(6)[of "vs" "r"]}\qquad $\mprset{flushleft}\inferrule{ @{thm (prem 1) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}\\\\ - @{thm (prem 2) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}\\\\ + @{thm (prem 2) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}\quad @{thm (prem 3) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]} } {@{thm (concl) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}} @@ -437,7 +450,7 @@ \begin{figure}[t] \begin{center}\small% \begin{tabular}{@ {\hspace{-2mm}}c@ {}} - \\[-4.5mm] + \\[-8.5mm] @{thm[mode=Axiom] Posix.intros(1)}\P\@{term "ONE"} \quad @{thm[mode=Axiom] Posix.intros(2)}\P\@{term "c"}\quad @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\P+L\\quad @@ -445,9 +458,9 @@ $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad - @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ + @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} - {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\medskip\smallskip\\ + {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\medskip\\ @{thm[mode=Axiom] Posix.intros(7)}\P[]\\qquad $\mprset{flushleft} \inferrule @@ -455,9 +468,9 @@ @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} - {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\\medskip\smallskip\\ + {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\\medskip\\ \mprset{sep=4mm} - @{thm[mode=Rule] Posix.intros(9)}\Pn[]\\quad + @{thm[mode=Rule] Posix.intros(9)}\Pn[]\\quad\; $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \qquad @@ -487,13 +500,16 @@ @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ - @{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)}\\ + @{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)} \end{tabular} - \medskip\\ + %\end{tabular} + %\end{center} + \smallskip\\ - %!\begin{tabular}{@ {}cc@ {}} + %\begin{center} + %\begin{tabular}{@ {}cc@ {}} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}@ {}} - @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ + @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)[of "c"]}\\ @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & @@ -525,7 +541,10 @@ a value for how the last derivative can match the empty string. In case of @{term "NTIMES r n"} we use the function @{term replicate} in order to generate a list of exactly @{term n} copies, which is the length of the list we expect in this - case. The injection function + case. The injection function\footnote{While the character argument @{text c} is not + strictly necessary in the @{text inj}-function for the fragment of regular expressions we + use in this paper, it is necessary for extended regular expressions. For example for the range regular expression of the form @{text "[a-z]"}. + We therefore keep this argument from the original formulation of @{text inj} by Sulzmann and Lu.} then calculates the corresponding value for each intermediate derivative until a value for the original regular expression is generated. Graphically the algorithm by @@ -536,7 +555,7 @@ left, the second phase. % \begin{center} -\begin{tikzpicture}[scale=0.99,node distance=9mm, +\begin{tikzpicture}[scale=0.85,node distance=8mm, every node/.style={minimum size=6mm}] \node (r1) {@{term "r\<^sub>1"}}; \node (r2) [right=of r1]{@{term "r\<^sub>2"}}; @@ -554,7 +573,7 @@ \draw[->,line width=1mm](v3)--(v2) node[below,midway] {\inj r\<^sub>2 b\}; \node (v1) [left=of v2] {@{term "v\<^sub>1"}}; \draw[->,line width=1mm](v2)--(v1) node[below,midway] {\inj r\<^sub>1 a\}; -\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; +\draw (r4) node[anchor=north west] {\;\raisebox{-5mm}{@{term "mkeps"}}}; \end{tikzpicture} \end{center} % @@ -608,8 +627,9 @@ \begin{tabular}{lcl} @{thm (lhs) lexer.simps(1)} & $\dn$ & @{thm (rhs) lexer.simps(1)}\\ @{thm (lhs) lexer.simps(2)} & $\dn$ & @{text "case"} @{term "lexer (der c r) s"} @{text of} - @{term "None"} @{text "\"} @{term None}\\ - & & \hspace{27mm}$|\;$ @{term "Some v"} @{text "\"} @{term "Some (injval r c v)"} + @{term "None"} @{text "\"} @{term None} + %%& & \hspace{27mm} + $|\;$ @{term "Some v"} @{text "\"} @{term "Some (injval r c v)"} \end{tabular} \end{center} @@ -627,13 +647,13 @@ With this in place we were able to prove: \begin{proposition}\mbox{}\label{lexercorrect} - \textrm{(1)} @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}.\\ - \mbox{\hspace{23.5mm}}\textrm{(2)}\; @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}. + \textrm{(1)}\; @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}.\\ + \mbox{\hspace{29.5mm}}\textrm{(2)}\; @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}. % % \smallskip\\ %\begin{tabular}{ll} - %(1) & @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}\\ - %(2) & @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}\\ + %(1) & @{thm (lhs) lexer_correct_None} \;if and only if\; @{thm (rhs) lexer_correct_None}\\ + %(2) & @{thm (lhs) lexer_correct_Some} \;if and only if\; @{thm (rhs) lexer_correct_Some}\\ %\end{tabular} \end{proposition} @@ -657,18 +677,18 @@ injected ``back'' into values. For this they annotate bitcodes to regular expressions, which we define in Isabelle/HOL as the datatype\medskip - %!\begin{center} - \noindent - \begin{minipage}{1.01\textwidth} + \begin{center} + %\noindent + %\begin{minipage}{0.9\textwidth} \,@{term breg} $\,::=\,$ @{term "AZERO"} $\,\mid$ @{term "AONE bs"} $\,\mid$ @{term "ACHAR bs c"} $\,\mid$ @{term "AALTs bs rs"} $\,\mid$ @{term "ASEQ bs r\<^sub>1 r\<^sub>2"} $\,\mid$ @{term "ASTAR bs r"} - $\,\mid$ \mbox{@{term "ANTIMES bs r n"}}\hfill\mbox{} - \end{minipage}\medskip - %!\end{center} + $\,\mid$ \mbox{@{term "ANTIMES bs r n"}}%\hfill\mbox{} + %\end{minipage}\medskip + \end{center} \noindent where @{text bs} stands for bitsequences; @{text r}, @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular @@ -702,9 +722,9 @@ \end{center} \noindent - As can be seen, this coding is ``lossy'' in the sense that we do not + As can be seen, this coding is ``lossy'' in the sense that it does not record explicitly character values and also not sequence values (for - them we just append two bitsequences). However, the + them it just appends two bitsequences). However, the different alternatives for @{text Left}, respectively @{text Right}, are recorded as @{text Z} and @{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate if there is still a value coming in the list of @{text Stars}, whereas @{text S} @@ -858,7 +878,7 @@ bitcoded regular expressions. % \begin{center} - \begin{tabular}{@ {\hspace{-1mm}}c@ {\hspace{6mm}}c@ {}} + \begin{tabular}{@ {\hspace{-1mm}}c@ {\hspace{10mm}}c@ {}} \begin{tabular}{@ {}l@ {\hspace{0.5mm}}c@ {\hspace{1mm}}l} $\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{False}$\\ $\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{True}$\\ @@ -933,10 +953,10 @@ \begin{center} \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} $\textit{blexer}\;r\,s$ & $\dn$ & - $\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$\\ - & & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$\\ - & & $\;\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$\\ - & & $\;\;\;\textit{else}\;\textit{None}$ + $\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$\\ + & & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$ + $\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$ + $\;\textit{else}\;\textit{None}$ \end{tabular} \end{center} @@ -991,12 +1011,12 @@ \begin{lemma}\label{bnullable}%\mbox{}\smallskip\\ \textit{(1)} $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\ -\mbox{\hspace{17mm}}\textit{(2)} $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\ -\mbox{\hspace{17mm}}\textit{(3)} $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$ +\mbox{\hspace{21.5mm}}\textit{(2)} $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\ +\mbox{\hspace{21.5mm}}\textit{(3)} $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$ %\begin{tabular}{ll} %\textit{(1)} & $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\ %\textit{(2)} & $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\ -%\textit{(3)} & $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$. +%\textit{(3)} & $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ \;provided\; $\textit{nullable}(r^\downarrow)$. %\end{tabular} \end{lemma} @@ -1030,7 +1050,7 @@ \begin{lemma}\label{flex} If @{text "lexer r s = Some v"} \;then\; @{text "v = "}$\,\textit{flex}\,r\,id\,s\, - (\mkeps (r\backslash s))$. + (\textit{mkeps}\,(r\backslash s))$. \end{lemma} \noindent @@ -1111,7 +1131,7 @@ regular expression (underlined with $r$). These copies will spawn new copies in later derivative steps and they in turn even more copies. This destroys any hope of taming the size of the derivatives. But the - second copy of $r$ in \eqref{derivex} will never contribute to a + second copy of $r$ in~\eqref{derivex} will never contribute to a value, because POSIX lexing will always prefer matching a string with the first copy. So it could be safely removed without affecting the correctness of the algorithm. The issue with the simple-minded @@ -1130,7 +1150,7 @@ de-nest, or spill out, @{text ALTs} as follows % \[ - @{term "ALTs bs\<^sub>1 (((ALTs bs\<^sub>2 rs\<^sub>2)) # rs\<^sub>1)"} + @{text "ALTs bs\<^sub>1 ((ALTs bs\<^sub>2 rs\<^sub>2) :: rs\<^sub>1)"} \quad\xrightarrow{bsimp}\quad @{text "ALTs bs\<^sub>1 ((map (fuse bs\<^sub>2) rs\<^sub>2) @ rs\<^sub>1)"} \] @@ -1171,8 +1191,8 @@ \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} @{thm (lhs) distinctWith.simps(1)} & $\dn$ & @{thm (rhs) distinctWith.simps(1)}\\ @{thm (lhs) distinctWith.simps(2)} & $\dn$ & - @{text "if (\ y \ acc. eq x y)"} \\ - & & @{text "then distinctWith xs eq acc"}\\ + @{text "if (\ y \ acc. eq x y)"} + @{text "then distinctWith xs eq acc"}\\ & & @{text "else x :: distinctWith xs eq ({x} \ acc)"} \end{tabular} \end{center} @@ -1190,17 +1210,23 @@ bitcoded regular expressions to @{text bool}: % \begin{center} - \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {}} + \begin{tabular}{@ {}cc@ {}} + \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {\hspace{1mm}}} @{thm (lhs) eq1.simps(1)} & $\dn$ & @{thm (rhs) eq1.simps(1)}\\ @{thm (lhs) eq1.simps(2)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(2)[of DUMMY DUMMY]}\\ + @{thm (lhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]}\\ + @{thm (lhs) eq1.simps(5)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(5)[of DUMMY DUMMY]}\\ + \mbox{} + \end{tabular} + & + \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {}} @{thm (lhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]} & $\dn$ & @{thm (rhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]}\\ @{thm (lhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]}\\ - @{thm (lhs) eq1.simps(5)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(5)[of DUMMY DUMMY]}\\ - @{thm (lhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]} & $\dn$ & - @{thm (rhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]}\\ - @{thm (lhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]}\\ @{thm (lhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]}\\ + @{thm (lhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]} & $\dn$ & \\ + \multicolumn{3}{r}{@{thm (rhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]}} + \end{tabular} \end{tabular} \end{center} @@ -1212,21 +1238,22 @@ Our simplification function depends on three more helper functions, one is called @{text flts} and analyses lists of regular expressions coming from alternatives. - It is defined as follows: + It is defined by four clauses as follows: \begin{center} - \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} - \multicolumn{3}{@ {}c}{@{thm (lhs) flts.simps(1)} $\dn$ @{thm (rhs) flts.simps(1)} \qquad\qquad\qquad\qquad - @{thm (lhs) flts.simps(2)} $\dn$ @{thm (rhs) flts.simps(2)}}\smallskip\\ - @{thm (lhs) flts.simps(3)[of "bs'" "rs'"]} & $\dn$ & @{thm (rhs) flts.simps(3)[of "bs'" "rs'"]}\\ - \multicolumn{3}{@ {}c}{@{text "flts (r :: rs)"} $\dn$ @{text "r :: flts rs"} - \hspace{5mm}(otherwise)} + \begin{tabular}{c} + @{thm (lhs) flts.simps(1)} $\dn$ @{thm (rhs) flts.simps(1)} \qquad + @{thm (lhs) flts.simps(2)} $\dn$ @{thm (rhs) flts.simps(2)} \qquad + @{text "flts ((ALTs bs' rs') :: rs"} + %@{ thm (lhs) flts.simps(3)[of "bs'" "rs'"]} + $\dn$ @{thm (rhs) flts.simps(3)[of "bs'" "rs'"]}\smallskip\\ + @{text "flts (r :: rs)"} $\dn$ @{text "r :: flts rs"}\hspace{5mm}(otherwise) \end{tabular} \end{center} \noindent The second clause of @{text flts} removes all instances of @{text ZERO} in alternatives and - the third ``de-nests'' alternatives (but retaining the + the third ``de-nests'' alternatives (but retains the bitsequence @{text "bs'"} accumulated in the inner alternative). There are some corner cases to be considered when the resulting list inside an alternative is empty or a singleton list. We take care of those cases in the @@ -1266,7 +1293,9 @@ \noindent We believe our recursive function @{term bsimp} simplifies bitcoded regular - expressions as intended by Sulzmann and Lu. There is no point in applying the + expressions as intended by Sulzmann and Lu with the small addition of removing ``useless'' @{text ONE}s + in sequence regular + expressions. There is no point in applying the @{text bsimp} function repeatedly (like the simplification in their paper which needs to be applied until a fixpoint is reached) because we can show that @{term bsimp} is idempotent, that is @@ -1304,8 +1333,8 @@ \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} $\textit{blexer}^+\;r\,s$ & $\dn$ & $\textit{let}\;r_{der} = (r^\uparrow)\backslash_{bsimp}\, s\;\textit{in}$\\ - & & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$\\ - & & $\;\;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$ $\;\;\textit{else}\;\textit{None}$ + & & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$ + $\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$ $\;\textit{else}\;\textit{None}$ \end{tabular} \end{center} @@ -1343,7 +1372,7 @@ the original POSIX value, there is no hope of appealing to @{text retrieve} in the correctness argument for @{term blexer_simp}. - We found it more helpful to introduce the rewriting systems shown in + For our proof we found it more helpful to introduce the rewriting systems shown in Fig~\ref{SimpRewrites}. The idea is to generate simplified regular expressions in small steps (unlike the @{text bsimp}-function which does the same in a big step), and show that each of @@ -1435,7 +1464,7 @@ \begin{figure}[t] \begin{center} \begin{tabular}{@ {\hspace{-8mm}}c@ {}} - \\[-7mm] + \\[-8mm] @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}$S\ZERO{}_l$\quad @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}$S\ZERO{}_r$\quad @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}$S\ONE$\\ @@ -1511,7 +1540,7 @@ bounded by $N_1 + \llbracket{}r_2\rrbracket + 1$. In (5) we know the list comprehension contains only regular expressions of size smaller than $N_2$. The list length after @{text distinctWith} is bounded by a number, which we call $l_{N_2}$. It stands for the number of distinct regular expressions smaller than $N_2$ (there can only be finitely many of them). -We reason similarly for @{text STAR}.\smallskip +We reason similarly for @{text STAR} and @{text NT}.\smallskip Clearly we give in this finiteness argument (Step (5)) a very loose bound that is @@ -1536,30 +1565,25 @@ Essentially it matches the string with the longer @{text "Right"}-alternative in the first sequence (and then the `rest' with the empty regular expression @{const ONE} from the second sequence). If we add the simplification above, then we obtain the following value -% -\[ @{term "Seq (Left (Char a)) (Left (Char b))"} -\] - -\noindent where the @{text "Left"}-alternatives get priority. However, this violates the POSIX rules and we have not been able to reconcile this problem. Therefore we leave better bounds for future work.%!\\[-6.5mm] -Note that while Antimirov was able to give a bound on the \emph{size} +Note also that while Antimirov was able to give a bound on the \emph{size} of his partial derivatives~\cite{Antimirov95}, Brzozowski gave a bound on the \emph{number} of derivatives, provided they are quotient via -ACI rules \cite{Brzozowski1964}. Brozozowski's result is crucial when one -uses derivatives for obtaining an automaton (it essentially bounds +ACI rules \cite{Brzozowski1964}. Brzozowski's result is crucial when one +uses his derivatives for obtaining a DFA (it essentially bounds the number of states). However, this result does \emph{not} transfer to our setting where we are interested in the \emph{size} of the -derivatives. For example it is not true for our derivatives that the -set of of derivatives $r \backslash s$ for a given $r$ and all strings +derivatives. For example it is \emph{not} true for our derivatives that the +set of derivatives $r \backslash s$ for a given $r$ and all strings $s$ is finite (even when simplified). This is because for our annotated regular expressions the bitcode annotation is dependent on the number of iterations that -are needed for STAR-regular expressions. This is not a problem for us: Since we intend to do lexing +are needed for @{text STAR}-regular expressions. This is not a problem for us: Since we intend to do lexing by calculating (as fast as possible) derivatives, the bound on the size -of the derivatives is important, not the number of derivatives. +of the derivatives is important, not their number. % of derivatives. *} @@ -1574,15 +1598,22 @@ \cite{Sulzmann2014}. This follows earlier work where we established the correctness of the first algorithm \cite{AusafDyckhoffUrban2016}. In the earlier work we needed to - introduce our own specification about what POSIX values are, + introduce our own specification for POSIX values, because the informal definition given by Sulzmann and Lu did not stand up to formal proof. Also for the second algorithm we needed - to introduce our own definitions and proof ideas in order to establish the - correctness. Our interest in the second algorithm - lies in the fact that by using bitcoded regular expressions and an aggressive - simplification method there is a chance that the derivatives - can be kept universally small (we established in this paper that - for a given $r$ they can be kept finitely bounded for all strings). + to introduce our own definitions and proof ideas in order to + establish the correctness. Our interest in the second algorithm + lies in the fact that by using bitcoded regular expressions and an + aggressive simplification method there is a chance that the + derivatives can be kept universally small (we established in this + paper that for a given $r$ they can be kept finitely bounded for + \emph{all} strings). Our formalisation is approximately 7500 lines of + Isabelle code. A little more than half of this code concerns the finiteness bound + obtained in Section 5. This slight ``bloat'' in the latter part is because + we had to introduce an intermediate datatype for annotated regular expressions and repeat many + definitions for this intermediate datatype. But overall we think this made our + formalisation work smoother. The code of our formalisation can be found at + \textcolor{darkblue}{\url{https://github.com/urbanchr/posix}}. %This is important if one is after %an efficient POSIX lexing algorithm based on derivatives. @@ -1597,46 +1628,46 @@ %point-of-view it is really important to have the formal proofs of %the corresponding properties at hand. - With the results reported here, we can of course only make a claim about the correctness - of the algorithm and the sizes of the + With the results reported here, we can of course only make a claim + about the correctness of the algorithm and the sizes of the derivatives, not about the efficiency or runtime of our version of - Sulzman and Lu's algorithm. But we found the size is an important + Sulzmann and Lu's algorithm. But we found the size is an important first indicator about efficiency: clearly if the derivatives can grow to arbitrarily big sizes and the algorithm needs to traverse the derivatives possibly several times, then the algorithm will be - slow---excruciatingly slow that is. Other works seems to make - stronger claims, but during our work we have developed a healthy - suspicion when for example experimental data is used to back up - efficiency claims. For instance Sulzmann and Lu write about their - equivalent of @{term blexer_simp} \textit{``...we can incrementally - compute bitcoded parse trees in linear time in the size of the - input''} \cite[Page 14]{Sulzmann2014}. Given the growth of the - derivatives in some cases even after aggressive simplification, - this is a hard to believe claim. A similar claim about a - theoretical runtime of @{text "O(n\<^sup>2)"} is made for the - Verbatim lexer, which calculates tokens according to POSIX + slow---excruciatingly slow that is. Other works seem to make + stronger claims, but during our formalisation work we have + developed a healthy suspicion when for example experimental data is + used to back up efficiency claims. For instance Sulzmann and Lu + write about their equivalent of @{term blexer_simp} \textit{``...we + can incrementally compute bitcoded parse trees in linear time in + the size of the input''} \cite[Page 14]{Sulzmann2014}. Given the + growth of the derivatives in some cases even after aggressive + simplification, this is a hard to believe claim. A similar claim + about a theoretical runtime of @{text "O(n\<^sup>2)"} for one + specific list of regular expressions is made for the Verbatim + lexer, which calculates tokens according to POSIX rules~\cite{verbatim}. For this, Verbatim uses Brzozowski's - derivatives like in our work. The authors write: \textit{``The - results of our empirical tests [..] confirm that Verbatim has - @{text "O(n\<^sup>2)"} time complexity.''} + derivatives like in our work. About their empirical data, the authors write: + \textit{``The results of our empirical tests [..] confirm that + Verbatim has @{text "O(n\<^sup>2)"} time complexity.''} \cite[Section~VII]{verbatim}. While their correctness proof for Verbatim is formalised in Coq, the claim about the runtime - complexity is only supported by some emperical evidence obtained by + complexity is only supported by some empirical evidence obtained by using the code extraction facilities of Coq. Given our observation - with the ``growth problem'' of derivatives, we tried out their - extracted OCaml code with the example \mbox{@{text "(a + - aa)\<^sup>*"}} as a single lexing rule, and it took for us around 5 - minutes to tokenise a string of 40 $a$'s and that increased to - approximately 19 minutes when the string is 50 $a$'s long. Taking - into account that derivatives are not simplified in the Verbatim - lexer, such numbers are not surprising. Clearly our result of - having finite derivatives might sound rather weak in this context - but we think such effeciency claims really require further - scrutiny. - - The contribution of this paper is to make sure derivatives do not - grow arbitrarily big (universially) In the example \mbox{@{text "(a - + aa)\<^sup>*"}}, \emph{all} derivatives have a size of 17 or + with the ``growth problem'' of derivatives, this runtime bound + is unlikely to hold universally: indeed we tried out their extracted OCaml + code with the example \mbox{@{text "(a + aa)\<^sup>*"}} as a single + lexing rule, and it took for us around 5 minutes to tokenise a + string of 40 $a$'s and that increased to approximately 19 minutes + when the string is 50 $a$'s long. Taking into account that + derivatives are not simplified in the Verbatim lexer, such numbers + are not surprising. Clearly our result of having finite + derivatives might sound rather weak in this context but we think + such efficiency claims really require further scrutiny. The + contribution of this paper is to make sure derivatives do not grow + arbitrarily big (universally). In the example \mbox{@{text "(a + + aa)\<^sup>*"}}, \emph{all} derivatives have a size of 17 or less. The result is that lexing a string of, say, 50\,000 a's with the regular expression \mbox{@{text "(a + aa)\<^sup>*"}} takes approximately 10 seconds with our Scala implementation of the @@ -1663,6 +1694,23 @@ \mbox{@{text "a\<^bsup>{1000}{100}{5}\<^esup>"}}, the maximum size for our derivatives is a moderate 14. + Let us also return to the example @{text + "a\<^bsup>{0}{4294967295}\<^esup>"} which until recently Rust + deemed acceptable. But this was due to a bug. It turns out that it took Rust + more than 11 minutes to generate an automaton for this regular + expression and then to determine that a string of just one(!) + @{text a} does \emph{not} match this regular expression. Therefore + it is probably a wise choice that in newer versions of Rust's + regular expression library such regular expressions are declared as + ``too big'' and raise an error message. While this is clearly + a contrived example, the safety guaranties Rust wants to provide necessitate + this conservative approach. + However, with the derivatives and simplifications we have shown + in this paper, the example can be solved with ease: + it essentially only involves operations on + integers and our Scala implementation takes only a few seconds to + find out that this string, or even much larger strings, do not match. + Let us also compare our work to the verified Verbatim++ lexer where the authors of the Verbatim lexer introduced a number of improvements and optimisations, for example memoisation @@ -1681,17 +1729,17 @@ this calculation (in the future) as fast as possible. What we can guaranty with the presented work is that the maximum size of the derivatives for \mbox{@{text "a\<^bsup>{100}{5}\<^esup>"}$\,\cdot\,$@{term "STAR a"}} is never bigger than 9. This means our Scala - implementation only needs a few seconds for this example and matching 50\,000 a's. + implementation again only needs a few seconds for this example and matching 50\,000 a's, say. % % %Possible ideas are %zippers which have been used in the context of parsing of %context-free grammars \cite{zipperparser}. - \medskip - - \noindent - Our Isabelle code including the results from Sec.~5 is available from \url{https://github.com/urbanchr/posix}. - %\\[-10mm] + %\\[-5mm] %\smallskip + + %\noindent + %Our Isabelle code including the results from Sec.~5 is available from \url{https://github.com/urbanchr/posix}. + %%\\[-10mm] %%\bibliographystyle{plain} diff -r 56057198e4f5 -r 70c10dc41606 thys3/PosixSpec.thy --- a/thys3/PosixSpec.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/PosixSpec.thy Fri May 26 08:10:17 2023 +0100 @@ -46,6 +46,9 @@ | "\ Void : ONE" | "\ Char c : CH c" | "\v \ set vs. \ v : r \ flat v \ [] \ \ Stars vs : STAR r" +| "\\v \ set vs1. \ v : r \ flat v \ []; + \v \ set vs2. \ v : r \ flat v = []; + length (vs1 @ vs2) = n\ \ \ Stars (vs1 @ vs2) : NTIMES r n" inductive_cases Prf_elims: "\ v : ZERO" @@ -54,6 +57,7 @@ "\ v : ONE" "\ v : CH c" "\ vs : STAR r" + "\ vs : NTIMES r n" lemma Prf_Stars_appendE: assumes "\ Stars (vs1 @ vs2) : STAR r" @@ -61,6 +65,28 @@ using assms by (auto intro: Prf.intros elim!: Prf_elims) +lemma Pow_cstring: + fixes A::"string set" + assumes "s \ A ^^ n" + shows "\ss1 ss2. concat (ss1 @ ss2) = s \ length (ss1 @ ss2) = n \ + (\s \ set ss1. s \ A \ s \ []) \ (\s \ set ss2. s \ A \ s = [])" +using assms +apply(induct n arbitrary: s) + apply(auto)[1] + apply(auto simp add: Sequ_def) + apply(drule_tac x="s2" in meta_spec) + apply(simp) +apply(erule exE)+ + apply(clarify) +apply(case_tac "s1 = []") +apply(simp) +apply(rule_tac x="ss1" in exI) +apply(rule_tac x="s1 # ss2" in exI) +apply(simp) +apply(rule_tac x="s1 # ss1" in exI) +apply(rule_tac x="ss2" in exI) + apply(simp) + done lemma flats_Prf_value: assumes "\s\set ss. \v. s = flat v \ \ v : r" @@ -75,15 +101,48 @@ apply(simp) apply(rule_tac x="v#vs" in exI) apply(simp) -done + done + +lemma Aux: + assumes "\s\set ss. s = []" + shows "concat ss = []" +using assms +by (induct ss) (auto) +lemma flats_cval: + assumes "\s\set ss. \v. s = flat v \ \ v : r" + shows "\vs1 vs2. flats (vs1 @ vs2) = concat ss \ length (vs1 @ vs2) = length ss \ + (\v\set vs1. \ v : r \ flat v \ []) \ + (\v\set vs2. \ v : r \ flat v = [])" +using assms +apply(induct ss rule: rev_induct) +apply(rule_tac x="[]" in exI)+ +apply(simp) +apply(simp) +apply(clarify) +apply(case_tac "flat v = []") +apply(rule_tac x="vs1" in exI) +apply(rule_tac x="v#vs2" in exI) +apply(simp) +apply(rule_tac x="vs1 @ [v]" in exI) +apply(rule_tac x="vs2" in exI) +apply(simp) +by (simp add: Aux) + +lemma pow_Prf: + assumes "\v\set vs. \ v : r \ flat v \ A" + shows "flats vs \ A ^^ (length vs)" + using assms + by (induct vs) (auto) lemma L_flat_Prf1: assumes "\ v : r" shows "flat v \ L r" -using assms -by (induct) (auto simp add: Sequ_def Star_concat) - + using assms + apply (induct v r rule: Prf.induct) + apply(auto simp add: Sequ_def Star_concat lang_pow_add) + by (metis pow_Prf) + lemma L_flat_Prf2: assumes "s \ L r" shows "\v. \ v : r \ flat v = s" @@ -105,7 +164,30 @@ next case (ALT r1 r2 s) then show "\v. \ v : ALT r1 r2 \ flat v = s" - unfolding L.simps by (fastforce intro: Prf.intros) + unfolding L.simps by (fastforce intro: Prf.intros) +next + case (NTIMES r n) + have IH: "\s. s \ L r \ \v. \ v : r \ flat v = s" by fact + have "s \ L (NTIMES r n)" by fact + then obtain ss1 ss2 where "concat (ss1 @ ss2) = s" "length (ss1 @ ss2) = n" + "\s \ set ss1. s \ L r \ s \ []" "\s \ set ss2. s \ L r \ s = []" + using Pow_cstring by force + then obtain vs1 vs2 where "flats (vs1 @ vs2) = s" "length (vs1 @ vs2) = n" + "\v\set vs1. \ v : r \ flat v \ []" "\v\set vs2. \ v : r \ flat v = []" + using IH flats_cval + apply - + apply(drule_tac x="ss1 @ ss2" in meta_spec) + apply(drule_tac x="r" in meta_spec) + apply(drule meta_mp) + apply(simp) + apply (metis Un_iff) + apply(clarify) + apply(drule_tac x="vs1" in meta_spec) + apply(drule_tac x="vs2" in meta_spec) + apply(simp) + done + then show "\v. \ v : NTIMES r n \ flat v = s" + using Prf.intros(7) flat_Stars by blast qed (auto intro: Prf.intros) @@ -130,9 +212,11 @@ and "LV ONE s = (if s = [] then {Void} else {})" and "LV (CH c) s = (if s = [c] then {Char c} else {})" and "LV (ALT r1 r2) s = Left ` LV r1 s \ Right ` LV r2 s" + and "LV (NTIMES r 0) s = (if s = [] then {Stars []} else {})" unfolding LV_def -by (auto intro: Prf.intros elim: Prf.cases) - + apply (auto intro: Prf.intros elim: Prf.cases) + by (metis Prf.intros(7) append.right_neutral empty_iff list.set(1) list.size(3)) + abbreviation "Prefixes s \ {s'. prefix s' s}" @@ -174,6 +258,64 @@ ultimately show "finite (Prefixes s)" by simp qed +definition + "Stars_Append Vs1 Vs2 \ {Stars (vs1 @ vs2) | vs1 vs2. Stars vs1 \ Vs1 \ Stars vs2 \ Vs2}" + +lemma finite_Stars_Append: + assumes "finite Vs1" "finite Vs2" + shows "finite (Stars_Append Vs1 Vs2)" + using assms +proof - + define UVs1 where "UVs1 \ Stars -` Vs1" + define UVs2 where "UVs2 \ Stars -` Vs2" + from assms have "finite UVs1" "finite UVs2" + unfolding UVs1_def UVs2_def + by(simp_all add: finite_vimageI inj_on_def) + then have "finite ((\(vs1, vs2). Stars (vs1 @ vs2)) ` (UVs1 \ UVs2))" + by simp + moreover + have "Stars_Append Vs1 Vs2 = (\(vs1, vs2). Stars (vs1 @ vs2)) ` (UVs1 \ UVs2)" + unfolding Stars_Append_def UVs1_def UVs2_def by auto + ultimately show "finite (Stars_Append Vs1 Vs2)" + by simp +qed + +lemma LV_NTIMES_subset: + "LV (NTIMES r n) s \ Stars_Append (LV (STAR r) s) (\i\n. LV (NTIMES r i) [])" +apply(auto simp add: LV_def) +apply(auto elim!: Prf_elims) + apply(auto simp add: Stars_Append_def) + apply(rule_tac x="vs1" in exI) + apply(rule_tac x="vs2" in exI) + apply(auto) + using Prf.intros(6) apply(auto) + apply(rule_tac x="length vs2" in bexI) + thm Prf.intros + apply(subst append.simps(1)[symmetric]) + apply(rule Prf.intros) + apply(auto)[1] + apply(auto)[1] + apply(simp) + apply(simp) + done + +lemma LV_NTIMES_Suc_empty: + shows "LV (NTIMES r (Suc n)) [] = + (\(v, vs). Stars (v#vs)) ` (LV r [] \ (Stars -` (LV (NTIMES r n) [])))" +unfolding LV_def +apply(auto elim!: Prf_elims simp add: image_def) +apply(case_tac vs1) +apply(auto) +apply(case_tac vs2) +apply(auto) +apply(subst append.simps(1)[symmetric]) +apply(rule Prf.intros) +apply(auto) +apply(subst append.simps(1)[symmetric]) +apply(rule Prf.intros) +apply(auto) + done + lemma LV_STAR_finite: assumes "\s. finite (LV r s)" shows "finite (LV (STAR r) s)" @@ -215,7 +357,22 @@ ultimately show "finite (LV (STAR r) s)" by (simp add: finite_subset) qed - + +lemma finite_NTimes_empty: + assumes "\s. finite (LV r s)" + shows "finite (LV (NTIMES r n) [])" + using assms + apply(induct n) + apply(auto simp add: LV_simps) + apply(subst LV_NTIMES_Suc_empty) + apply(rule finite_imageI) + apply(rule finite_cartesian_product) + using assms apply simp + apply(rule finite_vimageI) + apply(simp) + apply(simp add: inj_on_def) + done + lemma LV_finite: shows "finite (LV r s)" @@ -251,6 +408,15 @@ next case (STAR r s) then show "finite (LV (STAR r) s)" by (simp add: LV_STAR_finite) +next + case (NTIMES r n s) + have "\s. finite (LV r s)" by fact + then have "finite (Stars_Append (LV (STAR r) s) (\i\n. LV (NTIMES r i) []))" + apply(rule_tac finite_Stars_Append) + apply (simp add: LV_STAR_finite) + using finite_NTimes_empty by blast + then show "finite (LV (NTIMES r n) s)" + by (metis LV_NTIMES_subset finite_subset) qed @@ -271,6 +437,11 @@ \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r \ s\<^sub>4 \ L (STAR r))\ \ (s1 @ s2) \ STAR r \ Stars (v # vs)" | Posix_STAR2: "[] \ STAR r \ Stars []" +| Posix_NTIMES1: "\s1 \ r \ v; s2 \ NTIMES r (n - 1) \ Stars vs; flat v \ []; 0 < n; + \(\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ (s1 @ s\<^sub>3) \ L r \ s\<^sub>4 \ L (NTIMES r (n - 1)))\ + \ (s1 @ s2) \ NTIMES r n \ Stars (v # vs)" +| Posix_NTIMES2: "\\v \ set vs. [] \ r \ v; length vs = n\ + \ [] \ NTIMES r n \ Stars vs" inductive_cases Posix_elims: "s \ ZERO \ v" @@ -279,19 +450,47 @@ "s \ ALT r1 r2 \ v" "s \ SEQ r1 r2 \ v" "s \ STAR r \ v" + "s \ NTIMES r n \ v" lemma Posix1: assumes "s \ r \ v" shows "s \ L r" "flat v = s" using assms - by(induct s r v rule: Posix.induct) - (auto simp add: Sequ_def) + apply(induct s r v rule: Posix.induct) + apply(auto simp add: pow_empty_iff) + apply (metis Suc_pred concI lang_pow.simps(2)) + by (meson ex_in_conv set_empty) + +lemma Posix1a: + assumes "s \ r \ v" + shows "\ v : r" +using assms + apply(induct s r v rule: Posix.induct) + apply(auto intro: Prf.intros) + apply (metis Prf.intros(6) Prf_elims(6) set_ConsD val.inject(5)) + prefer 2 + apply (metis Posix1(2) Prf.intros(7) append_Nil empty_iff list.set(1)) + apply(erule Prf_elims) + apply(auto) + apply(subst append.simps(2)[symmetric]) + apply(rule Prf.intros) + apply(auto) + done text \ For a give value and string, our Posix definition determines a unique value. \ +lemma List_eq_zipI: + assumes "list_all2 (\v1 v2. v1 = v2) vs1 vs2" + and "length vs1 = length vs2" + shows "vs1 = vs2" + using assms + apply(induct vs1 vs2 rule: list_all2_induct) + apply(auto) + done + lemma Posix_determ: assumes "s \ r \ v1" "s \ r \ v2" shows "v1 = v2" @@ -356,6 +555,29 @@ case (Posix_STAR2 r v2) have "[] \ STAR r \ v2" by fact then show "Stars [] = v2" by cases (auto simp add: Posix1) +next + case (Posix_NTIMES2 vs r n v2) + then show "Stars vs = v2" + apply(erule_tac Posix_elims) + apply(auto) + apply (simp add: Posix1(2)) + apply(rule List_eq_zipI) + apply(auto simp add: list_all2_iff) + by (meson in_set_zipE) +next + case (Posix_NTIMES1 s1 r v s2 n vs) + have "(s1 @ s2) \ NTIMES r n \ v2" + "s1 \ r \ v" "s2 \ NTIMES r (n - 1) \ Stars vs" "flat v \ []" + "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s2 \ s1 @ s\<^sub>3 \ L r \ s\<^sub>4 \ L (NTIMES r (n - 1 )))" by fact+ + then obtain v' vs' where "v2 = Stars (v' # vs')" "s1 \ r \ v'" "s2 \ (NTIMES r (n - 1)) \ (Stars vs')" + apply(cases) apply (auto simp add: append_eq_append_conv2) + using Posix1(1) apply fastforce + apply (metis One_nat_def Posix1(1) Posix_NTIMES1.hyps(7) append.right_neutral append_self_conv2) + using Posix1(2) by blast + moreover + have IHs: "\v2. s1 \ r \ v2 \ v = v2" + "\v2. s2 \ NTIMES r (n - 1) \ v2 \ Stars vs = v2" by fact+ + ultimately show "Stars (v # vs) = v2" by auto qed @@ -368,13 +590,9 @@ shows "v \ LV r s" using assms unfolding LV_def apply(induct rule: Posix.induct) - apply(auto simp add: intro!: Prf.intros elim!: Prf_elims) - done + apply(auto simp add: intro!: Prf.intros elim!: Prf_elims Posix1a) + apply (smt (verit, best) One_nat_def Posix1a Posix_NTIMES1 L.simps(7)) + using Posix1a Posix_NTIMES2 by blast -lemma Posix_Prf: - assumes "s \ r \ v" - shows "\ v : r" - using assms Posix_LV LV_def - by simp end diff -r 56057198e4f5 -r 70c10dc41606 thys3/ROOT --- a/thys3/ROOT Fri May 26 08:09:30 2023 +0100 +++ b/thys3/ROOT Fri May 26 08:10:17 2023 +0100 @@ -3,8 +3,6 @@ "HOL-Library.Sublist" "RegLangs" "PosixSpec" - (*"Positions"*) - (*"PDerivs"*) "Lexer" "LexerSimp" "Blexer" @@ -27,8 +25,21 @@ Paper document_files "root.tex" + "lipics-v2021.cls" + "cc-by.pdf" + "lipics-logo-bw.pdf" + "root.bib" + + +(* +theories + Paper +document_files + "root.tex" "llncs.cls" "lipics-v2021.cls" "cc-by.pdf" "lipics-logo-bw.pdf" - "root.bib" \ No newline at end of file + "root.bib" +*) + diff -r 56057198e4f5 -r 70c10dc41606 thys3/RegLangs.thy --- a/thys3/RegLangs.thy Fri May 26 08:09:30 2023 +0100 +++ b/thys3/RegLangs.thy Fri May 26 08:10:17 2023 +0100 @@ -1,4 +1,3 @@ - theory RegLangs imports Main "HOL-Library.Sublist" begin @@ -22,6 +21,42 @@ and "{} ;; A = {}" by (simp_all add: Sequ_def) +lemma concI[simp,intro]: "u : A \ v : B \ u@v : A ;; B" +by (auto simp add: Sequ_def) + +lemma concE[elim]: +assumes "w \ A ;; B" +obtains u v where "u \ A" "v \ B" "w = u@v" +using assms by (auto simp: Sequ_def) + +lemma concI_if_Nil2: "[] \ B \ xs : A \ xs \ A ;; B" +by (metis append_Nil2 concI) + +lemma conc_assoc: "(A ;; B) ;; C = A ;; (B ;; C)" +by (auto elim!: concE) (simp only: append_assoc[symmetric] concI) + + +text \Language power operations\ + +overloading lang_pow == "compow :: nat \ string set \ string set" +begin + primrec lang_pow :: "nat \ string set \ string set" where + "lang_pow 0 A = {[]}" | + "lang_pow (Suc n) A = A ;; (lang_pow n A)" +end + + +lemma conc_pow_comm: + shows "A ;; (A ^^ n) = (A ^^ n) ;; A" +by (induct n) (simp_all add: conc_assoc[symmetric]) + +lemma lang_pow_add: "A ^^ (n + m) = (A ^^ n) ;; (A ^^ m)" + by (induct n) (auto simp: conc_assoc) + +lemma lang_empty: + fixes A::"string set" + shows "A ^^ 0 = {[]}" + by simp section \Semantic Derivative (Left Quotient) of Languages\ @@ -89,6 +124,11 @@ unfolding Der_def Sequ_def by(auto simp add: Star_decomp) +lemma Der_inter[simp]: "Der a (A \ B) = Der a A \ Der a B" + and Der_compl[simp]: "Der a (-A) = - Der a A" + and Der_Union[simp]: "Der a (Union M) = Union(Der a ` M)" + and Der_UN[simp]: "Der a (UN x:I. S x) = (UN x:I. Der a (S x))" +by (auto simp: Der_def) lemma Der_star[simp]: shows "Der c (A\) = (Der c A) ;; A\" @@ -104,6 +144,13 @@ finally show "Der c (A\) = (Der c A) ;; A\" . qed +lemma Der_pow[simp]: + shows "Der c (A ^^ n) = (if n = 0 then {} else (Der c A) ;; (A ^^ (n - 1)))" + apply(induct n arbitrary: A) + apply(auto simp add: Cons_eq_append_conv) + by (metis Suc_pred concI_if_Nil2 conc_assoc conc_pow_comm lang_pow.simps(2)) + + lemma Star_concat: assumes "\s \ set ss. s \ A" shows "concat ss \ A\" @@ -129,6 +176,7 @@ | SEQ rexp rexp | ALT rexp rexp | STAR rexp +| NTIMES rexp nat section \Semantics of Regular Expressions\ @@ -141,7 +189,7 @@ | "L (SEQ r1 r2) = (L r1) ;; (L r2)" | "L (ALT r1 r2) = (L r1) \ (L r2)" | "L (STAR r) = (L r)\" - +| "L (NTIMES r n) = (L r) ^^ n" section \Nullable, Derivatives\ @@ -154,7 +202,7 @@ | "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" | "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" | "nullable (STAR r) = True" - +| "nullable (NTIMES r n) = (if n = 0 then True else nullable r)" fun der :: "char \ rexp \ rexp" @@ -168,6 +216,8 @@ then ALT (SEQ (der c r1) r2) (der c r2) else SEQ (der c r1) r2)" | "der c (STAR r) = SEQ (der c r) (STAR r)" +| "der c (NTIMES r n) = (if n = 0 then ZERO else SEQ (der c r) (NTIMES r (n - 1)))" + fun ders :: "string \ rexp \ rexp" @@ -176,13 +226,23 @@ | "ders (c # s) r = ders s (der c r)" +lemma pow_empty_iff: + shows "[] \ (L r) ^^ n \ (if n = 0 then True else [] \ (L r))" + by (induct n) (auto simp add: Sequ_def) + lemma nullable_correctness: shows "nullable r \ [] \ (L r)" -by (induct r) (auto simp add: Sequ_def) + by (induct r) (auto simp add: Sequ_def pow_empty_iff) lemma der_correctness: shows "L (der c r) = Der c (L r)" -by (induct r) (simp_all add: nullable_correctness) + apply (induct r) + apply(auto simp add: nullable_correctness Sequ_def) + using Der_def apply force + using Der_def apply auto[1] + apply (smt (verit, ccfv_SIG) Der_def append_eq_Cons_conv mem_Collect_eq) + using Der_def apply force + using Der_Sequ Sequ_def by auto lemma ders_correctness: shows "L (ders s r) = Ders s (L r)" @@ -198,40 +258,4 @@ by (simp add: ders_append) -(* -datatype ctxt = - SeqC rexp bool - | AltCL rexp - | AltCH rexp - | StarC rexp - -function - down :: "char \ rexp \ ctxt list \ rexp * ctxt list" -and up :: "char \ rexp \ ctxt list \ rexp * ctxt list" -where - "down c (SEQ r1 r2) ctxts = - (if (nullable r1) then down c r1 (SeqC r2 True # ctxts) - else down c r1 (SeqC r2 False # ctxts))" -| "down c (CH d) ctxts = - (if c = d then up c ONE ctxts else up c ZERO ctxts)" -| "down c ONE ctxts = up c ZERO ctxts" -| "down c ZERO ctxts = up c ZERO ctxts" -| "down c (ALT r1 r2) ctxts = down c r1 (AltCH r2 # ctxts)" -| "down c (STAR r1) ctxts = down c r1 (StarC r1 # ctxts)" -| "up c r [] = (r, [])" -| "up c r (SeqC r2 False # ctxts) = up c (SEQ r r2) ctxts" -| "up c r (SeqC r2 True # ctxts) = down c r2 (AltCL (SEQ r r2) # ctxts)" -| "up c r (AltCL r1 # ctxts) = up c (ALT r1 r) ctxts" -| "up c r (AltCH r2 # ctxts) = down c r2 (AltCL r # ctxts)" -| "up c r (StarC r1 # ctxts) = up c (SEQ r (STAR r1)) ctxts" - apply(pat_completeness) - apply(auto) - done - -termination - sorry - -*) - - end \ No newline at end of file diff -r 56057198e4f5 -r 70c10dc41606 thys3/document/root.tex --- a/thys3/document/root.tex Fri May 26 08:09:30 2023 +0100 +++ b/thys3/document/root.tex Fri May 26 08:10:17 2023 +0100 @@ -1,5 +1,5 @@ -\documentclass[runningheads]{llncs} -%!\documentclass[runningheads]{lipics-v2021} +%!\documentclass[runningheads]{llncs} +\documentclass[runningheads]{lipics-v2021} \usepackage{times} \usepackage{isabelle} \usepackage{isabellesym} @@ -53,6 +53,7 @@ \newcommand{\ZERO}{\mbox{\bf 0}} \newcommand{\ONE}{\mbox{\bf 1}} \def\rs{\mathit{rs}} +\definecolor{darkblue}{rgb}{0,0,0.6} \def\Brz{Brzozowski} \def\der{\backslash} @@ -62,11 +63,11 @@ %!\bibliographystyle{plainurl} \bibliographystyle{plain} -\title{{POSIX} {L}exing with {B}itcoded {D}erivatives} -\titlerunning{POSIX Lexing with Bitcoded Derivatives} -\author{Chengsong Tan\inst{1,2} \and Christian Urban\inst{2}} -\institute{Imperial College London \and King's College London\\ -\email{\{chengsong.tan,christian.urban\}@kcl.ac.uk}} +%\title{{POSIX} {L}exing with {B}itcoded {D}erivatives} +%\titlerunning{POSIX Lexing with Bitcoded Derivatives} +%\author{Chengsong Tan\inst{1,2} \and Christian Urban\inst{2}} +%\institute{Imperial College London \and King's College London\\ +%\email{\{chengsong.tan,christian.urban\}@kcl.ac.uk}} %!\author{Chengsong Tan}{King's College London}{chengsong.tan@kcl.ac.uk}{}{} %!\author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{} %!\authorrunning{C.~Tan and C.~Urban} @@ -78,6 +79,19 @@ %!\renewcommand{\DOIPrefix}{} %!\nolinenumbers +\title{{POSIX} {L}exing with {B}itcoded {D}erivatives} +\titlerunning{POSIX Lexing with Bitcoded Derivatives} +\author{Chengsong Tan}{Imperial College London}{ctan1@ic.ac.uk}{}{} +\author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{} +\authorrunning{C.~Tan and C.~Urban} +\keywords{POSIX matching and lexing, derivatives of regular expressions, Isabelle/HOL} +\category{} +\ccsdesc[100]{Design and analysis of algorithms} +\ccsdesc[100]{Formal languages and automata theory} +\Copyright{\mbox{}} +\renewcommand{\DOIPrefix}{} +\nolinenumbers + \begin{document} \maketitle