# HG changeset patch # User Christian Urban # Date 1564389440 -3600 # Node ID 89e6605c4ca41c9ae734694cfd6255b0beb676ec # Parent a730a5a0bab988aa5e934efe4a29d4bcf2c69dac updated diff -r a730a5a0bab9 -r 89e6605c4ca4 TODO --- a/TODO Tue Jul 23 21:21:49 2019 +0100 +++ b/TODO Mon Jul 29 09:37:20 2019 +0100 @@ -5,7 +5,10 @@ Regular Path Queries & regular expressions +------ +Russ Cox + https://swtch.com/~rsc/regexp/regexp1.html ------ about extended regular expressions http://www.ddfy.de/ diff -r a730a5a0bab9 -r 89e6605c4ca4 exps/both.scala --- a/exps/both.scala Tue Jul 23 21:21:49 2019 +0100 +++ b/exps/both.scala Mon Jul 29 09:37:20 2019 +0100 @@ -54,6 +54,27 @@ case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp case class ASTAR(bs: Bits, r: ARexp) extends ARexp +// "faster" equality +def equ(a1: ARexp, a2: ARexp) : Boolean = (a1, a2) match { + case (AZERO, AZERO) => true + case (AONE(_), AONE(_)) => true + case (APRED(_, _, s1), APRED(_, _, s2)) => s1 == s2 + case (AALTS(_, Nil), AALTS(_, Nil)) => true + case (AALTS(_, r1::rs1), AALTS(_, r2::rs2)) => + equ(r1, r2) && equ(AALTS(Nil, rs1), AALTS(Nil, rs2)) + case (ASEQ(_, r1, r2), ASEQ(_, r3, r4)) => equ(r1, r3) && equ(r2, r4) + case (ASTAR(_, r1), ASTAR(_, r2)) => equ(r1, r2) + case _ => false +} + +def distinctEq(xs: List[ARexp], acc: List[ARexp] = Nil): List[ARexp] = xs match { + case Nil => Nil + case (x::xs) => { + if (acc.exists(equ(_, x))) distinctEq(xs, acc) + else x::distinctEq(xs, x::acc) + } +} + // abbreviations def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) @@ -449,7 +470,7 @@ case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } - case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match { + case AALTS(bs1, rs) => distinctEq(flats(rs.map(bsimp))) match { case Nil => AZERO case r :: Nil => fuse(bs1, r) case rs => AALTS(bs1, rs) @@ -479,7 +500,7 @@ // Quick example -val r : Rexp = ZERO | "a" +val r : Rexp = (ZERO | "a") | ZERO lexing(r, "a") @@ -775,11 +796,12 @@ println(btokenise_simp(WHILE_REGS, fib_prog)) println("equal? " + (tokenise_simp(WHILE_REGS, fib_prog) == btokenise_simp(WHILE_REGS, fib_prog))) -for (i <- 1 to 20) { +for (i <- 1 to 40) { print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i))) print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i))) - println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i))) + //println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i))) //println(" Bit2: " + time(btokenise2_simp(WHILE_REGS, fib_prog * i))) + println("") } diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/BitCoded.thy --- a/thys/BitCoded.thy Tue Jul 23 21:21:49 2019 +0100 +++ b/thys/BitCoded.thy Mon Jul 29 09:37:20 2019 +0100 @@ -1,9 +1,9 @@ - + theory BitCoded imports "Lexer" begin -section {* Bit-Encodings *} +section \Bit-Encodings\ datatype bit = Z | S @@ -118,6 +118,13 @@ | "erase (ASEQ _ r1 r2) = SEQ (erase r1) (erase r2)" | "erase (ASTAR _ r) = STAR (erase r)" +lemma decode_code_erase: + assumes "\ v : (erase a)" + shows "decode (code v) (erase a) = Some v" + using assms + by (simp add: decode_code) + + fun nonalt :: "arexp \ bool" where "nonalt (AALTs bs2 rs) = False" @@ -560,6 +567,13 @@ | "flts ((AALTs bs rs1) # rs) = (map (fuse bs) rs1) @ flts rs" | "flts (r1 # rs) = r1 # flts rs" +fun li :: "bit list \ arexp list \ arexp" + where + "li _ [] = AZERO" +| "li bs [a] = fuse bs a" +| "li bs as = AALTs bs as" + + fun bsimp_ASEQ :: "bit list \ arexp \ arexp \ arexp" where "bsimp_ASEQ _ AZERO _ = AZERO" @@ -582,6 +596,8 @@ | "bsimp r = r" + + fun bders_simp :: "arexp \ string \ arexp" where @@ -1285,26 +1301,34 @@ shows "good (bsimp a) \ bsimp a = AZERO" apply(induct a taking: asize rule: measure_induct) apply(case_tac x) - apply(simp) - apply(simp) + apply(simp) + apply(simp) apply(simp) prefer 3 apply(simp) prefer 2 + (* AALTs case *) apply(simp only:) apply(case_tac "x52") apply(simp) - apply(simp only: good0a) + thm good0a + (* AALTs list at least one - case *) + apply(simp only: ) apply(frule_tac x="a" in spec) apply(drule mp) - apply(simp) + apply(simp) + (* either first element is good, or AZERO *) apply(erule disjE) prefer 2 - apply(simp) + apply(simp) + (* in the AZERO case, the size is smaller *) apply(drule_tac x="AALTs x51 list" in spec) apply(drule mp) - apply(simp add: asize0) - apply(auto)[1] + apply(simp add: asize0) + apply(subst (asm) bsimp.simps) + apply(subst (asm) bsimp.simps) + apply(assumption) + (* in the good case *) apply(frule_tac x="AALTs x51 list" in spec) apply(drule mp) apply(simp add: asize0) @@ -1324,6 +1348,7 @@ apply(auto)[1] apply auto[1] apply (metis ex_map_conv nn1b nn1c) + (* in the AZERO case *) apply(simp) apply(frule_tac x="a" in spec) apply(drule mp) @@ -1866,6 +1891,728 @@ apply(auto) done +lemma LA: + assumes "\ v : ders s (erase r)" + shows "retrieve (bders r s) v = retrieve r (flex (erase r) id s v)" + using assms + apply(induct s arbitrary: r v rule: rev_induct) + apply(simp) + apply(simp add: bders_append ders_append) + apply(subst bder_retrieve) + apply(simp) + apply(drule Prf_injval) + by (simp add: flex_append) + + +lemma LB: + assumes "s \ (erase r) \ v" + shows "retrieve r v = retrieve r (flex (erase r) id s (mkeps (ders s (erase r))))" + using assms + apply(induct s arbitrary: r v rule: rev_induct) + apply(simp) + apply(subgoal_tac "v = mkeps (erase r)") + prefer 2 + apply (simp add: Posix1(1) Posix_determ Posix_mkeps nullable_correctness) + apply(simp) + apply(simp add: flex_append ders_append) + by (metis Posix_determ Posix_flex Posix_injval Posix_mkeps ders_snoc lexer_correctness(2) lexer_flex) + +lemma LB_sym: + assumes "s \ (erase r) \ v" + shows "retrieve r v = retrieve r (flex (erase r) id s (mkeps (erase (bders r s))))" + using assms + by (simp add: LB) + + +lemma LC: + assumes "s \ (erase r) \ v" + shows "retrieve r v = retrieve (bders r s) (mkeps (erase (bders r s)))" + apply(simp) + by (metis LA LB Posix1(1) assms lexer_correct_None lexer_flex mkeps_nullable) + + +lemma L0: + assumes "bnullable a" + shows "retrieve (bsimp a) (mkeps (erase (bsimp a))) = retrieve a (mkeps (erase a))" + using assms + by (metis b3 bmkeps_retrieve bmkeps_simp bnullable_correctness) + +thm bmkeps_retrieve + +lemma L0a: + assumes "s \ L(erase a)" + shows "retrieve (bsimp (bders a s)) (mkeps (erase (bsimp (bders a s)))) = + retrieve (bders a s) (mkeps (erase (bders a s)))" + using assms + by (metis L0 bnullable_correctness erase_bders lexer_correct_None lexer_flex) + +lemma L0aa: + assumes "s \ L (erase a)" + shows "[] \ erase (bsimp (bders a s)) \ mkeps (erase (bsimp (bders a s)))" + using assms + by (metis Posix_mkeps b3 bnullable_correctness erase_bders lexer_correct_None lexer_flex) + +lemma L0aaa: + assumes "[c] \ L (erase a)" + shows "[c] \ (erase a) \ flex (erase a) id [c] (mkeps (erase (bder c a)))" + using assms + by (metis bders.simps(1) bders.simps(2) erase_bders lexer_correct_None lexer_correct_Some lexer_flex option.inject) + +lemma L0aaaa: + assumes "[c] \ L (erase a)" + shows "[c] \ (erase a) \ flex (erase a) id [c] (mkeps (erase (bders a [c])))" + using assms + using L0aaa by auto + + +lemma L02: + assumes "bnullable (bder c a)" + shows "retrieve (bsimp a) (flex (erase (bsimp a)) id [c] (mkeps (erase (bder c (bsimp a))))) = + retrieve (bder c (bsimp a)) (mkeps (erase (bder c (bsimp a))))" + using assms + apply(simp) + using bder_retrieve L0 bmkeps_simp bmkeps_retrieve L0 LA LB + apply(subst bder_retrieve[symmetric]) + apply (metis L_bsimp_erase bnullable_correctness der_correctness erase_bder mkeps_nullable nullable_correctness) + apply(simp) + done + +lemma L02_bders: + assumes "bnullable (bders a s)" + shows "retrieve (bsimp a) (flex (erase (bsimp a)) id s (mkeps (erase (bders (bsimp a) s)))) = + retrieve (bders (bsimp a) s) (mkeps (erase (bders (bsimp a) s)))" + using assms + by (metis LA L_bsimp_erase bnullable_correctness ders_correctness erase_bders mkeps_nullable nullable_correctness) + + + + +lemma L03: + assumes "bnullable (bder c a)" + shows "retrieve (bder c (bsimp a)) (mkeps (erase (bder c (bsimp a)))) = + bmkeps (bsimp (bder c (bsimp a)))" + using assms + by (metis L0 L_bsimp_erase bmkeps_retrieve bnullable_correctness der_correctness erase_bder nullable_correctness) + +lemma L04: + assumes "bnullable (bder c a)" + shows "retrieve (bder c (bsimp a)) (mkeps (erase (bder c (bsimp a)))) = + retrieve (bsimp (bder c (bsimp a))) (mkeps (erase (bsimp (bder c (bsimp a)))))" + using assms + by (metis L0 L_bsimp_erase bnullable_correctness der_correctness erase_bder nullable_correctness) + +lemma L05: + assumes "bnullable (bder c a)" + shows "retrieve (bder c (bsimp a)) (mkeps (erase (bder c (bsimp a)))) = + retrieve (bsimp (bder c (bsimp a))) (mkeps (erase (bsimp (bder c (bsimp a)))))" + using assms + using L04 by auto + +lemma L06: + assumes "bnullable (bder c a)" + shows "bmkeps (bder c (bsimp a)) = bmkeps (bsimp (bder c (bsimp a)))" + using assms + by (metis L03 L_bsimp_erase bmkeps_retrieve bnullable_correctness der_correctness erase_bder nullable_correctness) + +lemma L07: + assumes "s \ L (erase r)" + shows "retrieve r (flex (erase r) id s (mkeps (ders s (erase r)))) + = retrieve (bders r s) (mkeps (erase (bders r s)))" + using assms + using LB LC lexer_correct_Some by auto + +lemma LXXX: + assumes "s \ (erase r) \ v" "s \ (erase (bsimp r)) \ v'" + shows "retrieve r v = retrieve (bsimp r) v'" + using assms + apply - + thm LC + apply(subst LC) + apply(assumption) + apply(subst L0[symmetric]) + using bnullable_correctness lexer_correctness(2) lexer_flex apply fastforce + apply(subst (2) LC) + apply(assumption) + apply(subst (2) L0[symmetric]) + using bnullable_correctness lexer_correctness(2) lexer_flex apply fastforce + + oops + + +lemma L07a: + assumes "s \ L (erase r)" + shows "retrieve (bsimp r) (flex (erase (bsimp r)) id s (mkeps (ders s (erase (bsimp r))))) + = retrieve r (flex (erase r) id s (mkeps (ders s (erase r))))" + using assms + apply(induct s arbitrary: r) + apply(simp) + using L0a apply force + apply(drule_tac x="(bder a r)" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(drule sym) + apply(simp) + apply(subst (asm) bder_retrieve) + apply (metis Posix_Prf Posix_flex Posix_mkeps ders.simps(2) lexer_correct_None lexer_flex) + apply(simp only: flex_fun_apply) + apply(simp) + using L0[no_vars] bder_retrieve[no_vars] LA[no_vars] LC[no_vars] L07[no_vars] + + +lemma L08: + assumes "s \ L (erase r)" + shows "retrieve (bders (bsimp r) s) (mkeps (erase (bders (bsimp r) s))) + = retrieve (bders r s) (mkeps (erase (bders r s)))" + using assms + apply(induct s arbitrary: r) + apply(simp) + using L0 bnullable_correctness nullable_correctness apply blast + apply(simp add: bders_append) + apply(drule_tac x="(bder a (bsimp r))" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(drule sym) + apply(simp) + apply(subst LA) + apply (metis L0aa L_bsimp_erase Posix1(1) ders.simps(2) ders_correctness erase_bder erase_bders mkeps_nullable nullable_correctness) + apply(subst LA) + using lexer_correct_None lexer_flex mkeps_nullable apply force + + using L0[no_vars] bder_retrieve[no_vars] LA[no_vars] LC[no_vars] L07[no_vars] + +thm L0[no_vars] bder_retrieve[no_vars] LA[no_vars] LC[no_vars] L07[no_vars] + oops + +lemma test: + assumes "s = [c]" + shows "retrieve (bders r s) v = XXX" and "YYY = retrieve r (flex (erase r) id s v)" + using assms + apply(simp only: bders.simps) + defer + using assms + apply(simp only: flex.simps id_simps) + using L0[no_vars] bder_retrieve[no_vars] LA[no_vars] LC[no_vars] + find_theorems "retrieve (bders _ _) _" + find_theorems "retrieve _ (mkeps _)" + oops + +lemma L06X: + assumes "bnullable (bder c a)" + shows "bmkeps (bder c (bsimp a)) = bmkeps (bder c a)" + using assms + apply(induct a arbitrary: c) + apply(simp) + apply(simp) + apply(simp) + prefer 3 + apply(simp) + prefer 2 + apply(simp) + + defer + apply(simp only: bnullable.simps) + oops + +lemma L06_2: + assumes "bnullable (bders a [c,d])" + shows "bmkeps (bders (bsimp a) [c,d]) = bmkeps (bsimp (bders (bsimp a) [c,d]))" + using assms + apply(simp) + by (metis L_bsimp_erase bmkeps_simp bnullable_correctness der_correctness erase_bder nullable_correctness) + +lemma L06_bders: + assumes "bnullable (bders a s)" + shows "bmkeps (bders (bsimp a) s) = bmkeps (bsimp (bders (bsimp a) s))" + using assms + by (metis L_bsimp_erase bmkeps_simp bnullable_correctness ders_correctness erase_bders nullable_correctness) + +lemma LLLL: + shows "L (erase a) = L (erase (bsimp a))" + and "L (erase a) = {flat v | v. \ v: (erase a)}" + and "L (erase a) = {flat v | v. \ v: (erase (bsimp a))}" + using L_bsimp_erase apply(blast) + apply (simp add: L_flat_Prf) + using L_bsimp_erase L_flat_Prf apply(auto)[1] + done + + + +lemma LXX_bders: + assumes "" + shows "bmkeps (bders (bsimp a) s) = bmkeps (bders a s)" + using assms + apply(induct s arbitrary: a rule: rev_induct) + apply(simp add: bmkeps_simp) + apply(simp add: bders_append) + + + +lemma L07: + assumes "s \ L (erase a)" + shows "s \ erase a \ flex (erase a) id s (mkeps (ders s (erase a)))" + using assms + by (meson lexer_correct_None lexer_correctness(1) lexer_flex) + +lemma LX0: + assumes "s \ L r" + shows "decode (bmkeps (bders (intern r) s)) r = Some(flex r id s (mkeps (ders s r)))" + by (metis assms blexer_correctness blexer_def lexer_correct_None lexer_flex) + + +lemma L02_bders2: + assumes "bnullable (bders a s)" "s = [c]" + shows "retrieve (bders (bsimp a) s) (mkeps (erase (bders (bsimp a) s))) = + retrieve (bders a s) (mkeps (erase (bders a s)))" + using assms + apply(simp) + + apply(induct s arbitrary: a) + apply(simp) + using L0 apply auto[1] + apply(simp) + +thm bmkeps_retrieve bmkeps_simp Posix_mkeps + +lemma WQ1: + assumes "s \ L (der c r)" + shows "s \ der c r \ mkeps (ders s (der c r))" + using assms + sorry + +lemma L02_bsimp: + assumes "bnullable (bders a s)" + shows "retrieve (bsimp a) (flex (erase (bsimp a)) id s (mkeps (erase (bders (bsimp a) s)))) = + retrieve a (flex (erase a) id s (mkeps (erase (bders a s))))" + using assms + apply(induct s arbitrary: a) + apply(simp) + apply (simp add: L0) + apply(simp) + apply(drule_tac x="bder a aa" in meta_spec) + apply(simp) + apply(subst (asm) bder_retrieve) + using Posix_Prf Posix_flex Posix_mkeps bnullable_correctness apply fastforce + apply(simp add: flex_fun_apply) + apply(drule sym) + apply(simp) + apply(subst flex_injval) + apply(subst bder_retrieve[symmetric]) + apply (metis L_bsimp_erase Posix_Prf Posix_flex Posix_mkeps bders.simps(2) bnullable_correctness ders.simps(2) erase_bders lexer_correct_None lexer_flex option.distinct(1)) + apply(simp only: erase_bder[symmetric] erase_bders[symmetric]) + apply(subst LB_sym[symmetric]) + apply(simp) + apply(rule WQ1) + apply(simp only: erase_bder[symmetric]) + apply(rule L07) + apply (metis L_bsimp_erase bnullable_correctness der_correctness erase_bder erase_bders lexer_correct_None lexer_flex option.simps(3)) + + apply(simp) + (*sledgehammer [timeout = 6000]*) + + + using bder_retrieve + + + +lemma LX0MAIN: + assumes "s \ r \ v" + shows "decode (bmkeps (bders_simp (intern r) s)) r = Some(flex r id s v)" + using assms + apply(induct s arbitrary: r v) + apply(simp) + apply (metis LX0 Posix1(1) bders.simps(1) lexer_correctness(1) lexer_flex option.simps(3)) + apply(simp add: bders_simp_append ders_append flex_append) + apply(subst bmkeps_simp[symmetric]) + apply (met is L_bders_simp Posix1(1) bnullable_correctness der_correctness ders_snoc erase_bder erase_bders erase_intern lexer_correct_None lexer_flex nullable_correctness) + (*apply(subgoal_tac "v = flex r id xs (injval (ders xs r) x (mkeps (ders (xs @ [x]) r)))") + prefer 2 + apply (metis Posix1(1) flex.simps(1) flex.simps(2) flex_append lexer_correct_None lexer_correctness(1) lexer_flex option.inject) +*) apply(drule_tac x="r" in meta_spec) + apply(drule_tac x="(mkeps (ders xs r))" in meta_spec) + apply(drule meta_mp) + + defer + apply(simp) + apply(drule sym) + apply(simp) + using bder_retrieve MAIN_decode + oops + +lemma LXaa: + assumes "s \ L (erase a)" + shows "decode (bmkeps (bsimp (bders (bsimp a) s))) (erase a) = decode (bmkeps (bders a s)) (erase a)" + using assms + apply(induct s arbitrary: a) + apply(simp) + using bmkeps_simp bnullable_correctness bsimp_idem nullable_correctness apply auto[1] + apply(simp) + apply(drule_tac x="bsimp (bder a (bsimp aa))" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(subst (asm) bsimp_idem) + apply(subst bmkeps_simp[symmetric]) + apply (metis L0aa L_bsimp_erase Posix1(1) bders.simps(2) bnullable_correctness nullable_correctness) + apply(subst (asm) bmkeps_simp[symmetric]) + apply (metis L0aa L_bsimp_erase Posix1(1) bnullable_correctness ders.simps(2) ders_correctness erase_bder erase_bders nullable_correctness) + apply(subst bmkeps_retrieve) + apply (metis L0aa L_bsimp_erase Posix1(1) bders.simps(2) nullable_correctness) + apply(subst LA) + apply(simp) + apply (metis L_bsimp_erase ders.simps(2) lexer_correct_None lexer_flex mkeps_nullable) + + + +lemma LXaa: + assumes "s \ L (erase a)" + shows "bmkeps (bsimp (bders (bsimp a) s)) = bmkeps (bders a s)" + using assms + apply(induct s arbitrary: a) + apply(simp) + using bmkeps_simp bnullable_correctness bsimp_idem nullable_correctness apply auto[1] + apply(simp) + apply(drule_tac x="bsimp (bder a (bsimp aa))" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(subst (asm) bsimp_idem) + apply(subst bmkeps_simp[symmetric]) + apply (metis L0aa L_bsimp_erase Posix1(1) bders.simps(2) bnullable_correctness nullable_correctness) + apply(subst (asm) bmkeps_simp[symmetric]) + apply (metis L0aa L_bsimp_erase Posix1(1) bnullable_correctness ders.simps(2) ders_correctness erase_bder erase_bders nullable_correctness) + + +lemma LXaa: + assumes "s \ L (erase a)" + shows "bmkeps (bders (bsimp a) s) = bmkeps (bders a s)" + using assms + apply(induct s arbitrary: a) + apply(simp) + using bmkeps_simp bnullable_correctness nullable_correctness apply auto[1] + apply(simp) + apply(drule_tac x="bder a aa" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(drule sym) + apply(simp) + (* transformation to retrieve *) + apply(subst bmkeps_retrieve) + apply (metis L0aa L_bsimp_erase Posix1(1) bders.simps(2) ders_correctness erase_bders nullable_correctness) + apply(subst bmkeps_retrieve) + apply (metis b4 bders_simp.simps(2) bnullable_correctness erase_bders lexer_correct_None lexer_flex) + apply(subst (2) LC[symmetric]) + prefer 2 + apply(subst L0) + apply(subst LA) + apply(simp) + apply (m etis L_bsimp_erase bders.simps(2) erase_bder erase_bders lexer_correct_None lexer_flex mkeps_nullable) + apply(subst LA) + apply(simp) + apply (m etis L0aa b3 b4 bders_simp.simps(2) bnullable_correctness erase_bders lexer.simps(1) lexer_correctness(2) mkeps_nullable) + apply(subst (2) LB_sym[symmetric]) + prefer 2 + apply(subst L0) + apply(simp) + using lexer_correct_None lexer_flex apply fastforce + apply(subst (asm) bmkeps_retrieve) + apply (metis L_bsimp_erase bders.simps(2) erase_bders lexer_correct_None lexer_flex) + apply(subst (asm) bmkeps_retrieve) + apply (metis L0aa L_bsimp_erase b3 b4 bders_simp.simps(2) bnullable_correctness lexer.simps(1) lexer_correctness(2)) + apply(subst LA) + apply (met is L0aa L_bsimp_erase Posix1(1) ders.simps(2) ders_correctness erase_bder erase_bders mkeps_nullable nullable_correctness) + apply(subst LA) + using lexer_correct_None lexer_flex mkeps_nullable apply force + + using L0aaaa LB[OF L0aaaa] + apply(subst LB[OF L0aaaa]) + using L0aaaa + apply(rule L0aaaa) + + + + + + + + +lemma L00: + assumes "s \ L (erase a)" + shows "retrieve (bsimp a) (flex (erase (bsimp a)) id s (mkeps (ders s (erase (bsimp a))))) = + retrieve a (flex (erase a) id s (mkeps (ders s (erase a))))" + using assms + apply(induct s arbitrary: a taking: length rule: measure_induct) + apply(case_tac x) + apply(simp) + using L0 b3 bnullable_correctness nullable_correctness apply blast + apply(simp) + apply(drule_tac x="[]" in spec) + apply(simp) + apply(drule_tac x="bders (bsimp a) (aa#list)" in spec) + apply(simp) + apply(drule mp) + apply (metis L_bsimp_erase ders.simps(2) erase_bder erase_bders lexer_correct_None lexer_flex nullable_correctness) + using bder_retrieve bmkeps_simp bmkeps_retrieve L0 LA LB LC L02 L03 L04 L05 + + oops + +lemma L00: + assumes "s \ L (erase (bsimp a))" + shows "retrieve (bsimp a) (flex (erase (bsimp a)) id s (mkeps (ders s (erase (bsimp a))))) = + retrieve a (flex (erase a) id s (mkeps (ders s (erase a))))" + using assms + apply(induct s arbitrary: a) + apply(simp) + using L0 b3 bnullable_correctness nullable_correctness apply blast + apply(simp add: flex_append) + apply(drule_tac x="bder a aa" in meta_spec) + apply(drule meta_mp) + apply (metis L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(simp) + + (*using bmkeps_retrieve bder_retrieve*) + apply(subst (asm) bder_retrieve) + apply (metis L_bsimp_erase Posix_Prf Posix_flex Posix_mkeps ders.simps(2) lexer_correct_None lexer_flex) + apply(simp add: flex_fun_apply) + apply(drule sym) + apply(simp) + using bmkeps_retrieve bder_retrieve + oops + +lemma L0a: + assumes "s \ L (erase a)" + shows "retrieve (bders_simp a s) (mkeps (erase (bders_simp a s))) = + retrieve (bders a s) (mkeps (erase (bders a s)))" + using assms + apply(induct s arbitrary: a) + apply(simp) + apply(simp) + apply(drule_tac x="bsimp (bder a aa)" in meta_spec) + apply(drule meta_mp) + using L_bsimp_erase lexer_correct_None apply fastforce + apply(simp) + apply(subst LA) + apply (metis L_bsimp_erase ders.simps(2) ders_correctness erase_bder lexer_correct_None lexer_flex mkeps_nullable nullable_correctness) + apply(subst LA) + apply(simp) + apply (metis ders.simps(2) lexer_correct_None lexer_flex mkeps_nullable) + + apply(simp) + (* MAIN *) + + apply(subst L00) + apply (met is L_bsimp_erase erase_bder lexer.simps(2) lexer_correct_None option.case(1)) + apply(simp) + done + +lemma L0a: + assumes "s \ L (erase a)" + shows "retrieve (bders_simp a s) (mkeps (erase (bders_simp a s))) = + retrieve (bders a s) (mkeps (erase (bders a s)))" + using assms + apply(induct s arbitrary: a rule: rev_induct) + apply(simp) + apply(simp add: bders_simp_append) + apply(subst L0) + apply (metis L_bders_simp bnullable_correctness der_correctness ders_snoc erase_bder erase_bders lexer_correct_None lexer_flex nullable_correctness) + apply(subst bder_retrieve) + apply (metis L_bders_simp der_correctness ders_snoc erase_bder erase_bders lexer_correct_None lexer_flex mkeps_nullable nullable_correctness) + apply(simp add: bders_append) + apply(subst bder_retrieve) + apply (metis ders_snoc erase_bders lexer_correct_None lexer_flex mkeps_nullable) + apply(simp add: ders_append) + using bder_retrieve + + find_theorems "retrieve _ (injval _ _ _)" +find_theorems "mkeps (erase _)" + + +lemma L1: + assumes "s \ r \ v" + shows "decode (bmkeps (bders (intern r) s)) r = Some v" + using assms + by (metis blexer_correctness blexer_def lexer_correctness(1) option.distinct(1)) + +lemma L2: + assumes "s \ (der c r) \ v" + shows "decode (bmkeps (bders (intern r) (c # s))) r = Some (injval r c v)" + using assms + apply(subst bmkeps_retrieve) + using Posix1(1) lexer_correct_None lexer_flex apply fastforce + using MAIN_decode + apply(subst MAIN_decode[symmetric]) + apply(simp) + apply (meson Posix1(1) lexer_correct_None lexer_flex mkeps_nullable) + apply(simp) + apply(subgoal_tac "v = flex (der c r) id s (mkeps (ders s (der c r)))") + prefer 2 + apply (metis Posix_determ lexer_correctness(1) lexer_flex option.distinct(1)) + apply(simp) + apply(subgoal_tac "injval r c (flex (der c r) id s (mkeps (ders s (der c r)))) = + (flex (der c r) ((\v. injval r c v) o id) s (mkeps (ders s (der c r))))") + apply(simp) + using flex_fun_apply by blast + +lemma L3: + assumes "s2 \ (ders s1 r) \ v" + shows "decode (bmkeps (bders (intern r) (s1 @ s2))) r = Some (flex r id s1 v)" + using assms + apply(induct s1 arbitrary: r s2 v rule: rev_induct) + apply(simp) + using L1 apply blast + apply(simp add: ders_append) + apply(drule_tac x="r" in meta_spec) + apply(drule_tac x="x # s2" in meta_spec) + apply(drule_tac x="injval (ders xs r) x v" in meta_spec) + apply(drule meta_mp) + defer + apply(simp) + apply(simp add: flex_append) + by (simp add: Posix_injval) + +lemma L4: + assumes "[c] \ r \ v" + shows "bmkeps (bder c (bsimp (intern r))) = code (flex r id [c] v)" + using assms + apply(subst bmkeps_retrieve) + apply (metis L_bsimp_erase Posix1(1) bders.simps(1) bders.simps(2) erase_bders erase_intern lexer_correct_None lexer_flex) + apply(subst bder_retrieve) + apply (metis L_bsimp_erase Posix1(1) bders.simps(1) bders.simps(2) erase_bder erase_bders erase_intern lexer_correct_None lexer_flex mkeps_nullable) + apply(simp) + + + + + +lemma + assumes "s \ L r" + shows "bmkeps (bders_simp (intern r) s) = bmkeps (bders (intern r) s)" + using assms + apply(induct s arbitrary: r rule: rev_induct) + apply(simp) + apply(simp add: bders_simp_append bders_append) + apply(subst bmkeps_simp[symmetric]) + apply (metis b4 bders.simps(1) bders.simps(2) bders_simp_append blexer_correctness blexer_def lexer_correct_None) + apply(subst bmkeps_retrieve) + apply(simp) + apply (metis L_bders_simp der_correctness ders_snoc erase_bders erase_intern lexer_correct_None lexer_flex nullable_correctness) + apply(subst bmkeps_retrieve) + apply(simp) + apply (metis ders_snoc lexer_correct_None lexer_flex) + apply(subst bder_retrieve) + apply (metis L_bders_simp der_correctness ders_snoc erase_bder erase_bders erase_intern lexer_correct_None lexer_flex mkeps_nullable nullable_correctness) + apply(subst bder_retrieve) + apply (metis ders_snoc erase_bder erase_bders erase_intern lexer_correct_None lexer_flex mkeps_nullable) + using bmkeps_retrieve + find_theorems "retrieve (bders _ _) = _" + find_theorems "retrieve _ _ = _" + oops + +lemma + assumes "s \ r \ v" + shows "decode (bmkeps (bders_simp (intern r) s)) = Some v" + using assms + apply(induct s arbitrary: r v taking: length rule: measure_induct) + apply(subgoal_tac "x = [] \ (\a xs. x = xs @ [a])") + prefer 2 + apply (meson rev_exhaust) + apply(erule disjE) + apply(simp add: blexer_simp_def) + apply (metis Posix1(1) Posix_Prf Posix_determ Posix_mkeps bmkeps_retrieve erase_intern lexer.simps(1) lexer_correct_None retrieve_code) + apply(clarify) + apply(simp add: bders_simp_append) + apply(subst bmkeps_simp[symmetric]) + apply (metis b3 b4 bders_simp.simps(1) bders_simp.simps(2) bders_simp_append blexer_correctness blexer_def lexer_correctness(1) option.distinct(1)) + apply(subst bmkeps_retrieve) + apply (metis L_bders_simp Posix1(1) der_correctness ders_snoc erase_bder erase_bders erase_intern lexer_correct_None lexer_flex nullable_correctness) + apply(simp) + apply(subst bder_retrieve) + apply (metis L_bders_simp Posix1(1) der_correctness ders_snoc erase_bders erase_intern lexer_correct_None lexer_flex mkeps_nullable nullable_correctness) + + + find_theorems "bmkeps _ = _" + + apply(subgoal_tac "lexer r (xs @ [a]) = Some v") + prefer 2 + using lexer_correctness(1) apply blast + apply(simp (no_asm_simp) add: bders_simp_append) + apply(subst bmkeps_simp[symmetric]) + apply (m etis b4 bders.simps(1) bders.simps(2) bders_simp_append bnullable_correctness erase_bders erase_intern lexer_flex option.distinct(1)) + apply(subgoal_tac "nullable (ders (xs @ [a]) r)") + prefer 2 + apply (metis lexer_flex option.distinct(1)) + apply(simp add: lexer_flex) + apply(simp add: flex_append ders_append) + apply(drule_tac sym) + apply(simp) + using Posix_flex flex.simps Posix_flex2 + apply(drule_tac Posix_flex2) + apply (simp add: Prf_injval mkeps_nullable) + apply(drule_tac x="[a]" in spec) + apply(drule mp) + defer + apply(drule_tac x="ders xs r" in spec) + apply(drule_tac x="injval (ders xs r) a (mkeps (der a (ders xs r)))" in spec) + apply(simp) + apply(subst (asm) bmkeps_simp[symmetric]) + using bnullable_correctness apply fastforce + + + find_theorems "good (bsimp _)" + oops + +lemma + assumes "s \ L (erase a)" + shows "bmkeps (bders (bsimp a) s) = bmkeps (bders a s)" + using assms + apply(induct s arbitrary: a taking : length rule: measure_induct) + apply(case_tac x) + apply(simp) + using bmkeps_simp bnullable_correctness nullable_correctness apply auto[1] + using bsimp_idem apply auto[1] + apply(simp add: bders_append bders_simp_append) + apply(drule_tac x="bder a (bsimp aa)" in meta_spec) + apply(drule meta_mp) + defer + apply(simp) + oops + + +lemma + assumes "s \ L (erase r)" + shows "bmkeps (bders_simp r s) = bmkeps (bders r s)" + using assms + apply(induct s arbitrary: r) + apply(simp) + apply(simp add: bders_simp_append bders_append) + apply(drule_tac x="bsimp (bder a r)" in meta_spec) + apply(drule meta_mp) + defer + apply(simp) + + find_theorems "good (bsimp _)" + oops + +lemma + assumes "s \ L r" + shows "decode (bmkeps (bders_simp (intern r) s)) r = Some (flex r id s (mkeps (ders s r)))" + using assms + apply(induct s arbitrary: r) + apply(simp) + using bmkeps_retrieve decode_code mkeps_nullable nullable_correctness retrieve_code apply auto[1] + apply(simp add: bders_simp_append) + apply(subst bmkeps_simp[symmetric]) + apply(subgoal_tac "[] \ L (ders (xs @ [x]) r)") + prefer 2 + apply (meson Posix1(1) lexer_correct_None lexer_flex nullable_correctness) + apply(simp add: ders_append) + using L_bders_simp bnullable_correctness der_correctness nullable_correctness apply f orce + apply(simp add: flex_append ders_append) + apply(drule_tac x="der x r" in meta_spec) + apply(simp add: ders_append) + find_theorems "intern _" + find_theorems "bmkeps (bsimp _)" + + find_theorems "(_ # _) \ _ \ _" + oops + lemma ZZZ: assumes "\y. asize y < Suc (sum_list (map asize x52)) \ bsimp (bder c (bsimp y)) = bsimp (bder c y)" @@ -1899,39 +2646,6 @@ apply(simp add: bders_append) done -lemma - assumes "bnullable (bders a s)" "good a" - shows "bmkeps (bders_simp a s) = bmkeps (bders a s)" - using assms - apply(induct s arbitrary: a) - apply(simp) - apply(simp add: bders_append bders_simp_append) - apply(drule_tac x="bsimp (bsimp (bder a aa))" in meta_spec) - apply(drule meta_mp) - apply (metis b4 bders.simps(2) bders_simp.simps(2) bsimp_idem) - apply(subgoal_tac "good (bsimp (bder a aa)) \ bsimp (bder a aa) = AZERO") - apply(auto simp add: bders_AZERO) - prefer 2 - apply (metis b4 bders.simps(2) bders_AZERO(2) bders_simp.simps(2) bnullable.simps(1)) - prefer 2 - using good1 apply auto[1] - apply(drule meta_mp) - apply (simp add: bsimp_idem) - apply (subst (asm) (1) bsimp_idem) - apply(simp) - apply(subst (asm) (2) bmkeps_simp) - apply (metis b4 bders.simps(2) bders_simp.simps(2) bsimp_idem) - apply (subst (asm) (1) bsimp_idem) - oops - - -lemma - shows "bmkeps (bders (bders_simp a s2) s1) = bmkeps (bders_simp a (s2 @ s1))" - apply(induct s2 arbitrary: a s1) - apply(simp) - defer - apply(simp add: bders_append bders_simp_append) - oops lemma QQ1: shows "bsimp (bders (bsimp a) []) = bders_simp (bsimp a) []" @@ -2039,19 +2753,6 @@ apply (metis n0 nn1b test2) by (metis flts_fuse flts_nothing) -lemma "good (bsimp a) \ bsimp a = AZERO" - by (simp add: good1) - - -lemma - assumes "bnullable (bders r s)" "good r" - shows "bmkeps (bders (bsimp r) s) = bmkeps (bders r s)" - using assms - apply(induct s arbitrary: r) - apply(simp) - using bmkeps_simp apply auto[1] - apply(simp) - by (simp add: test2) lemma PP: assumes "bnullable (bders r s)" @@ -2101,7 +2802,7 @@ using assms apply(induct x52 arbitrary: x51) apply(simp) - + oops lemma @@ -2160,15 +2861,7 @@ apply (metis bsimp_AALTs.simps(3) neq_Nil_conv) apply(auto) apply (metis add.commute bsimp_size leD le_neq_implies_less less_add_Suc1 less_add_eq_less rt) - - apply(drule_tac x="AALTs x51 list" in spec) - apply(drule mp) - apply(auto simp add: asize0)[1] - - - -(* HERE*) - + oops @@ -2211,94 +2904,133 @@ prefer 2 apply (metis bbbbs1 bsimp.simps(2) bsimp_AALTs.simps(1) bsimp_idem flts.simps(1) good.simps(5) good1 length_0_conv length_Suc_conv less_one list.simps(8) nat_neq_iff not_less_eq) apply(auto) - sledgehammer [timeout=6000] - - - - - defer - apply(case_tac list) - apply(simp) - prefer 2 - apply(simp) - apply (simp add: bsimp_fuse bsimp_idem) - apply(case_tac a) - apply(simp_all) + oops + + +lemma + assumes "rs = [AALTs bsa [AONE bsb, AONE bsb]]" + shows "bsimp (bsimp_AALTs bs rs) = bsimp_AALTs bs (flts (map bsimp rs))" + using assms + apply(simp) + oops + +lemma + assumes "rs = [AALTs bs0 [AALTs bsa [AONE bsb, AONE bsb]]]" + shows "flts [bsimp_AALTs bs (flts rs)] = flts (map (fuse bs) rs)" + using assms + apply(simp only:) + apply(simp only: flts.simps append_Nil2 List.list.map fuse.simps) + apply(simp only: bsimp_AALTs.simps) + apply(simp only: fuse.simps) + apply(simp only: flts.simps append_Nil2) + find_theorems "nonnested (bsimp_AALTs _ _)" + find_theorems "map _ (_ # _) = _" + apply(induct rs arbitrary: bs rule: flts.induct) + apply(auto) + apply(case_tac rs) + apply(simp) - - apply(subst k0b) - apply(simp) - apply(subst (asm) k0) - apply(subst (asm) (2) k0) - - - find_theorems "asize _ < asize _" - using bsimp_AALTs_size3 - apply - - apply(drule_tac x="a # list" in meta_spec) - apply(drule_tac x="bs" in meta_spec) - apply(drule meta_mp) + oops + + find_theorems "asize (bsimp _)" + +lemma CT1: + shows "bsimp (AALTs bs as) = bsimp(AALTs bs (map bsimp as))" + apply(induct as arbitrary: bs) apply(simp) apply(simp) - apply(drule_tac x="(bsimp a) # list" in spec) - apply(drule mp) + by (simp add: bsimp_idem comp_def) + +lemma CT1a: + shows "bsimp (AALT bs a1 a2) = bsimp(AALT bs (bsimp a1) (bsimp a2))" + by (metis CT1 list.simps(8) list.simps(9)) + +(* CT *) + +lemma CTU: + shows "bsimp_AALTs bs as = li bs as" + apply(induct bs as rule: li.induct) + apply(auto) + done + +thm flts.simps + + +lemma CTa: + assumes "\r \ set as. nonalt r \ r \ AZERO" + shows "flts as = as" + using assms + apply(induct as) apply(simp) - apply(case_tac list) - apply(simp) - prefer 2 - apply(simp) - apply(subst (asm) k0) - apply(subst (asm) (2) k0) - thm k0 - apply(subst (asm) k0b) - apply(simp) - apply(simp) - - defer + apply(case_tac as) apply(simp) - apply(case_tac list) - apply(simp) - defer + apply (simp add: k0b) + using flts_nothing by auto + +lemma CT0: + assumes "\r \ set as1. nonalt r \ r \ AZERO" + shows "flts [bsimp_AALTs bs1 as1] = flts (map (fuse bs1) as1)" + using assms CTa + apply(induct as1 arbitrary: bs1) apply(simp) - find_theorems "asize _ < asize _" - find_theorems "asize _ < asize _" -apply(subst k0b) - apply(simp) - apply(simp) - - - apply(rule nn11a) - apply(simp) - apply (metis good.simps(1) good1 good_fuse) - apply(simp) - using fuse_append apply blast - apply(subgoal_tac "\bs rs. bsimp x43 = AALTs bs rs") - prefer 2 - using nonalt.elims(3) apply blast - apply(clarify) + apply(simp) + apply(case_tac as1) apply(simp) - apply(case_tac rs) - apply(simp) - apply (metis arexp.distinct(7) good.simps(4) good1) - apply(simp) - apply(case_tac list) - apply(simp) - apply (metis arexp.distinct(7) good.simps(5) good1) apply(simp) +proof - +fix a :: arexp and as1a :: "arexp list" and bs1a :: "bit list" and aa :: arexp and list :: "arexp list" + assume a1: "nonalt a \ a \ AZERO \ nonalt aa \ aa \ AZERO \ (\r\set list. nonalt r \ r \ AZERO)" + assume a2: "\as. \r\set as. nonalt r \ r \ AZERO \ flts as = as" + assume a3: "as1a = aa # list" + have "flts [a] = [a]" +using a1 k0b by blast +then show "fuse bs1a a # fuse bs1a aa # map (fuse bs1a) list = flts (fuse bs1a a # fuse bs1a aa # map (fuse bs1a) list)" + using a3 a2 a1 by (metis (no_types) append.left_neutral append_Cons flts_fuse k00 k0b list.simps(9)) +qed - - - +lemma CT01: + assumes "\r \ set as1. nonalt r \ r \ AZERO" "\r \ set as2. nonalt r \ r \ AZERO" + shows "flts [bsimp_AALTs bs1 as1, bsimp_AALTs bs2 as2] = flts ((map (fuse bs1) as1) @ (map (fuse bs2) as2))" + using assms CT0 + by (metis k0 k00) - find_theorems "flts [_] = [_]" - apply(case_tac x52) + +lemma CTT: + assumes "\r\set (flts (map (bsimp \ bder c) as1)). nonalt r \ r \ AZERO" + "\r\set (flts (map (bsimp \ bder c) as2)). nonalt r \ r \ AZERO" + shows "bsimp (AALT bs (AALTs bs1 (map (bder c) as1)) (AALTs bs2 (map (bder c) as2))) + = XXX" + apply(subst bsimp_idem[symmetric]) apply(simp) - apply(simp) - apply(case_tac list) - apply(simp) - apply(case_tac a) - apply(simp_all) + apply(subst CT01) + apply(rule assms) + apply(rule assms) + (* CT *) + +lemma + shows "bsimp (AALT bs (AALTs bs1 (map (bder c) as1)) (AALTs bs2 (map (bder c) as2))) + = bsimp (AALTs bs ((map (fuse bs1) (map (bder c) as1)) @ + (map (fuse bs2) (map (bder c) as2))))" + apply(subst bsimp_idem[symmetric]) + apply(simp) + oops + +lemma CT_exp: + assumes "\a \ set as. bsimp (bder c (bsimp a)) = bsimp (bder c a)" + shows "map bsimp (map (bder c) as) = map bsimp (map (bder c) (map bsimp as))" + using assms + apply(induct as) + apply(auto) + done + +lemma asize_set: + assumes "a \ set as" + shows "asize a < Suc (sum_list (map asize as))" + using assms + apply(induct as arbitrary: a) + apply(auto) + using le_add2 le_less_trans not_less_eq by blast lemma XXX2a_long_without_good: @@ -2311,15 +3043,191 @@ prefer 3 apply(simp) (* AALT case *) + prefer 2 + apply(simp del: bsimp.simps) + apply(subst (2) CT1) + apply(subst CT_exp) + apply(auto)[1] + using asize_set apply blast + apply(subst CT1[symmetric]) + apply(simp) + oops + +lemma big: + shows "bsimp (AALT x51 (AALTs bs1 as1) (AALTs bs2 as2)) = + bsimp (AALTs x51 ((map (fuse bs1) as1) @ (map (fuse bs2) as2)))" + apply(simp add: comp_def bder_fuse) + apply(simp add: flts_append) + + sorry + +lemma XXX2a_long_without_good: + shows "bsimp (bder c (bsimp a)) = bsimp (bder c a)" + apply(induct a arbitrary: c taking: "\a. asize a" rule: measure_induct) + apply(case_tac x) + apply(simp) + apply(simp) + apply(simp) + prefer 3 + apply(simp) + (* AALT case *) prefer 2 + apply(case_tac "\a1 a2. x52 = [a1, a2]") + apply(clarify) + apply(simp del: bsimp.simps) + apply(subst (2) CT1) + apply(simp del: bsimp.simps) + apply(rule_tac t="bsimp (bder c a1)" and s="bsimp (bder c (bsimp a1))" in subst) + apply(simp del: bsimp.simps) + apply(rule_tac t="bsimp (bder c a2)" and s="bsimp (bder c (bsimp a2))" in subst) + apply(simp del: bsimp.simps) + apply(subst CT1a[symmetric]) + apply(subst bsimp.simps) + apply(simp del: bsimp.simps) + apply(case_tac "\bs1 as1. bsimp a1 = AALTs bs1 as1") + apply(case_tac "\bs2 as2. bsimp a2 = AALTs bs2 as2") + apply(clarify) + apply(simp only:) + apply(simp del: bsimp.simps bder.simps) + apply(subst bsimp_AALTs_qq) + prefer 2 + apply(simp del: bsimp.simps) + apply(subst big) + prefer 2 + apply (metis (no_types, lifting) Nil_is_append_conv Nil_is_map_conv One_nat_def Suc_lessI arexp.distinct(7) bsimp.simps(2) bsimp_AALTs.simps(1) bsimp_idem flts.simps(1) length_0_conv length_append length_greater_0_conv less_Suc0 less_add_same_cancel1) + apply(simp add: comp_def bder_fuse) + + thm bder_fuse + + find_theorems "1 < length _" + +(* CT *) +(* + under assumption that bsimp a1 = AALT; bsimp a = AALT + + bsimp (AALT x51 (AALTs bs1 (map (bder c) as1)) (AALTs bs2 (map (bder c) as2))) = + bsimp (AALTs x51 (map (fuse bs1) (map (bder c) as1)) @ ( map (fuse bs2) (map (bder c) as2))) + + bsimp_AALTs x51 (flts (map bsimp [a1, a2]))) + = bsimp_AALTs x51 (flts [bsimp a1, bsimp a2]) + +*) + + apply(case_tac "bsimp a1") + prefer 5 + apply(simp only:) + apply(case_tac "bsimp a2") + apply(simp) + + prefer 5 + apply(simp only:) + apply(simp only: bder.simps) + apply(simp) + + + + + prefer 2 + using nn1b apply blast apply(simp only:) - apply(simp) + apply(case_tac x52) + apply(simp) + apply(simp only:) + apply(case_tac "\bsa rsa. a = AALTs bsa rsa") + apply(clarify) + apply(simp) + apply(frule_tac x="AALTs x51 ((map (fuse bsa) rsa) @ list)" in spec) + apply(drule mp) + apply(simp) + apply (simp add: qq) + apply(drule_tac x="c" in spec) + apply(simp only: bder.simps) + apply(simp) + apply(subst k0) + apply(subst (2) k0) + apply(subst (asm) (2) flts_append) + apply(rotate_tac 2) + + +lemma XXX2a_long_without_good: + shows "bsimp (bder c (bsimp a)) = bsimp (bder c a)" + apply(induct a arbitrary: c taking: "\a. asize a" rule: measure_induct) + apply(case_tac x) + apply(simp) + apply(simp) + apply(simp) + prefer 3 + apply(simp) + (* AALT case *) + prefer 2 + apply(subgoal_tac "nonnested (bsimp x)") + prefer 2 + using nn1b apply blast + apply(simp only:) + apply(drule_tac x="AALTs x51 (flts x52)" in spec) + apply(drule mp) + defer + apply(drule_tac x="c" in spec) + apply(simp) + apply(rotate_tac 2) + + apply(drule sym) + apply(simp) + + apply(simp only: bder.simps) + apply(simp only: bsimp.simps) apply(subst bder_bsimp_AALTs) apply(case_tac x52) apply(simp) apply(simp) + apply(case_tac list) + apply(simp) + apply(case_tac a) + apply(simp) + apply(simp) + apply(simp) + defer + apply(simp) + + + (* case AALTs list is not empty *) + apply(simp) + apply(subst k0) + apply(subst (2) k0) + apply(simp) + apply(case_tac "bsimp a = AZERO") + apply(subgoal_tac "bsimp (bder c a) = AZERO") + prefer 2 + using less_iff_Suc_add apply auto[1] + apply(simp) + apply(drule_tac x="AALTs x51 list" in spec) + apply(drule mp) + apply(simp add: asize0) + apply(drule_tac x="c" in spec) + apply(simp add: bder_bsimp_AALTs) + apply(case_tac "nonalt (bsimp a)") + prefer 2 + apply(drule_tac x="bsimp (AALTs x51 (a#list))" in spec) + apply(drule mp) + apply(rule order_class.order.strict_trans2) + apply(rule bsimp_AALTs_size3) + apply(auto)[1] + apply(simp) + apply(subst (asm) bsimp_idem) + apply(drule_tac x="c" in spec) + apply(simp) + find_theorems "_ < _ \ _ \ _ \_ < _" + apply(rule le_trans) + apply(subgoal_tac "flts [bsimp a] = [bsimp a]") + prefer 2 + using k0b apply blast + apply(simp) + find_theorems "asize _ < asize _" + + using bder_bsimp_AALTs apply(case_tac list) - apply(simp) + apply(simp) + sledgeha mmer [timeout=6000] apply(case_tac "\r \ set (map bsimp x52). \nonalt r") apply(drule_tac x="bsimp (AALTs x51 x52)" in spec) @@ -2581,6 +3489,19 @@ apply(simp only: bders.simps) apply(subst bders_simp.simps) apply(simp) - + oops + + +lemma + fixes n :: nat + shows "(\i \ {0..n}. i) = n * (n + 1) div 2" + apply(induct n) + apply(simp) + apply(simp) + done + + + + end \ No newline at end of file diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/Journal/Paper.thy --- a/thys/Journal/Paper.thy Tue Jul 23 21:21:49 2019 +0100 +++ b/thys/Journal/Paper.thy Mon Jul 29 09:37:20 2019 +0100 @@ -132,14 +132,14 @@ -section {* Introduction *} +section \Introduction\ -text {* +text \ Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em -derivative} @{term "der c r"} of a regular expression @{text r} w.r.t.\ -a character~@{text c}, and showed that it gave a simple solution to the +derivative} @{term "der c r"} of a regular expression \r\ w.r.t.\ +a character~\c\, and showed that it gave a simple solution to the problem of matching a string @{term s} with a regular expression @{term r}: if the derivative of @{term r} w.r.t.\ (in succession) all the characters of the string matches the empty string, then @{term r} @@ -175,8 +175,7 @@ into a sequence of tokens, POSIX is the more natural disambiguation strategy for what programmers consider basic syntactic building blocks in their programs. These building blocks are often specified by some -regular expressions, say @{text "r\<^bsub>key\<^esub>"} and @{text -"r\<^bsub>id\<^esub>"} for recognising keywords and identifiers, +regular expressions, say \r\<^bsub>key\<^esub>\ and \r\<^bsub>id\<^esub>\ for recognising keywords and identifiers, respectively. There are a few underlying (informal) rules behind tokenising a string in a POSIX \cite{POSIX} fashion: @@ -196,23 +195,22 @@ be longer than no match at all. \end{itemize} -\noindent Consider for example a regular expression @{text -"r\<^bsub>key\<^esub>"} for recognising keywords such as @{text "if"}, -@{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} +\noindent Consider for example a regular expression \r\<^bsub>key\<^esub>\ for recognising keywords such as \if\, +\then\ and so on; and \r\<^bsub>id\<^esub>\ recognising identifiers (say, a single character followed by characters or numbers). Then we can form the regular expression -@{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\"} -and use POSIX matching to tokenise strings, say @{text "iffoo"} and -@{text "if"}. For @{text "iffoo"} we obtain by the Longest Match Rule +\(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\\ +and use POSIX matching to tokenise strings, say \iffoo\ and +\if\. For \iffoo\ we obtain by the Longest Match Rule a single identifier token, not a keyword followed by an -identifier. For @{text "if"} we obtain by the Priority Rule a keyword -token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} -matches also. By the Star Rule we know @{text "(r\<^bsub>key\<^esub> + -r\<^bsub>id\<^esub>)\<^sup>\"} matches @{text "iffoo"}, -respectively @{text "if"}, in exactly one `iteration' of the star. The +identifier. For \if\ we obtain by the Priority Rule a keyword +token, not an identifier token---even if \r\<^bsub>id\<^esub>\ +matches also. By the Star Rule we know \(r\<^bsub>key\<^esub> + +r\<^bsub>id\<^esub>)\<^sup>\\ matches \iffoo\, +respectively \if\, in exactly one `iteration' of the star. The Empty String Rule is for cases where, for example, the regular expression -@{text "(a\<^sup>\)\<^sup>\"} matches against the -string @{text "bc"}. Then the longest initial matched substring is the +\(a\<^sup>\)\<^sup>\\ matches against the +string \bc\. Then the longest initial matched substring is the empty string, which is matched by both the whole regular expression and the parenthesised subexpression. @@ -225,25 +223,24 @@ expression matches a string, values encode the information of \emph{how} the string is matched by the regular expression---that is, which part of the string is matched by which part of the regular -expression. For this consider again the string @{text "xy"} and -the regular expression \mbox{@{text "(x + (y + xy))\<^sup>\"}} +expression. For this consider again the string \xy\ and +the regular expression \mbox{\(x + (y + xy))\<^sup>\\} (this time fully parenthesised). We can view this regular expression -as tree and if the string @{text xy} is matched by two Star -`iterations', then the @{text x} is matched by the left-most -alternative in this tree and the @{text y} by the right-left alternative. This +as tree and if the string \xy\ is matched by two Star +`iterations', then the \x\ is matched by the left-most +alternative in this tree and the \y\ by the right-left alternative. This suggests to record this matching as \begin{center} @{term "Stars [Left(Char x), Right(Left(Char y))]"} \end{center} -\noindent where @{const Stars}, @{text Left}, @{text Right} and @{text -Char} are constructors for values. @{text Stars} records how many -iterations were used; @{text Left}, respectively @{text Right}, which +\noindent where @{const Stars}, \Left\, \Right\ and \Char\ are constructors for values. \Stars\ records how many +iterations were used; \Left\, respectively \Right\, which alternative is used. This `tree view' leads naturally to the idea that regular expressions act as types and values as inhabiting those types (see, for example, \cite{HosoyaVouillonPierce2005}). The value for -matching @{text "xy"} in a single `iteration', i.e.~the POSIX value, +matching \xy\ in a single `iteration', i.e.~the POSIX value, would look as follows \begin{center} @@ -316,11 +313,11 @@ We extend our results to ??? Bitcoded version?? -*} +\ -section {* Preliminaries *} +section \Preliminaries\ -text {* \noindent Strings in Isabelle/HOL are lists of characters with +text \\noindent Strings in Isabelle/HOL are lists of characters with the empty string being represented by the empty list, written @{term "[]"}, and list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual bracket notation for lists also for strings; for @@ -333,7 +330,7 @@ inductive datatype: \begin{center} - @{text "r :="} + \r :=\ @{const "ZERO"} $\mid$ @{const "ONE"} $\mid$ @{term "CHAR c"} $\mid$ @@ -365,8 +362,8 @@ DUMMY"} for the concatenation of two languages (it is also list-append for strings). We use the star-notation for regular expressions and for languages (in the last clause above). The star for languages is defined - inductively by two clauses: @{text "(i)"} the empty string being in - the star of a language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a + inductively by two clauses: \(i)\ the empty string being in + the star of a language and \(ii)\ if @{term "s\<^sub>1"} is in a language and @{term "s\<^sub>2"} in the star of this language, then also @{term "s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient to use the following notion of a \emph{semantic derivative} (or \emph{left @@ -459,11 +456,11 @@ \cite{Sulzmann2014} is to append another phase to this algorithm in order to calculate a [lexical] value. We will explain the details next. -*} +\ -section {* POSIX Regular Expression Matching\label{posixsec} *} +section \POSIX Regular Expression Matching\label{posixsec}\ -text {* +text \ There have been many previous works that use values for encoding \emph{how} a regular expression matches a string. @@ -473,7 +470,7 @@ are defined as the inductive datatype \begin{center} - @{text "v :="} + \v :=\ @{const "Void"} $\mid$ @{term "val.Char c"} $\mid$ @{term "Left v"} $\mid$ @@ -532,8 +529,8 @@ \end{center} \noindent where in the clause for @{const "Stars"} we use the - notation @{term "v \ set vs"} for indicating that @{text v} is a - member in the list @{text vs}. We require in this rule that every + notation @{term "v \ set vs"} for indicating that \v\ is a + member in the list \vs\. We require in this rule that every value in @{term vs} flattens to a non-empty string. The idea is that @{term "Stars"}-values satisfy the informal Star Rule (see Introduction) where the $^\star$ does not match the empty string unless this is @@ -549,9 +546,9 @@ \end{proposition} \noindent - Given a regular expression @{text r} and a string @{text s}, we define the - set of all \emph{Lexical Values} inhabited by @{text r} with the underlying string - being @{text s}:\footnote{Okui and Suzuki refer to our lexical values + Given a regular expression \r\ and a string \s\, we define the + set of all \emph{Lexical Values} inhabited by \r\ with the underlying string + being \s\:\footnote{Okui and Suzuki refer to our lexical values as \emph{canonical values} in \cite{OkuiSuzuki2010}. The notion of \emph{non-problematic values} by Cardelli and Frisch \cite{Frisch2004} is related, but not identical to our lexical values.} @@ -573,7 +570,7 @@ infinitely many values, but according to our more restricted definition only a single value, namely @{thm LV_STAR_ONE_empty}. - If a regular expression @{text r} matches a string @{text s}, then + If a regular expression \r\ matches a string \s\, then generally the set @{term "LV r s"} is not just a singleton set. In case of POSIX matching the problem is to calculate the unique lexical value that satisfies the (informal) POSIX rules from the Introduction. @@ -582,9 +579,9 @@ path from the left to the right involving @{term derivatives}/@{const nullable} is the first phase of the algorithm (calculating successive \Brz's derivatives) and @{const - mkeps}/@{text inj}, the path from right to left, the second + mkeps}/\inj\, the path from right to left, the second phase. This picture shows the steps required when a regular - expression, say @{text "r\<^sub>1"}, matches the string @{term + expression, say \r\<^sub>1\, matches the string @{term "[a,b,c]"}. We first build the three derivatives (according to @{term a}, @{term b} and @{term c}). We then use @{const nullable} to find out whether the resulting derivative regular expression @@ -609,11 +606,11 @@ \node (v4) [below=of r4]{@{term "v\<^sub>4"}}; \draw[->,line width=1mm](r4) -- (v4); \node (v3) [left=of v4] {@{term "v\<^sub>3"}}; -\draw[->,line width=1mm](v4)--(v3) node[below,midway] {@{text "inj r\<^sub>3 c"}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\inj r\<^sub>3 c\}; \node (v2) [left=of v3]{@{term "v\<^sub>2"}}; -\draw[->,line width=1mm](v3)--(v2) node[below,midway] {@{text "inj r\<^sub>2 b"}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\inj r\<^sub>2 b\}; \node (v1) [left=of v2] {@{term "v\<^sub>1"}}; -\draw[->,line width=1mm](v2)--(v1) node[below,midway] {@{text "inj r\<^sub>1 a"}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\inj r\<^sub>1 a\}; \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; \end{tikzpicture} \end{center} @@ -647,8 +644,7 @@ makes some subtle choices leading to a POSIX value: for example if an alternative regular expression, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can match the empty string and furthermore @{term "r\<^sub>1"} can match the - empty string, then we return a @{text Left}-value. The @{text - Right}-value will only be returned if @{term "r\<^sub>1"} cannot match the empty + empty string, then we return a \Left\-value. The \Right\-value will only be returned if @{term "r\<^sub>1"} cannot match the empty string. The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is @@ -690,25 +686,25 @@ might be instructive to look first at the three sequence cases (clauses \textit{(4)} -- \textit{(6)}). In each case we need to construct an ``injected value'' for @{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term - "Seq DUMMY DUMMY"}\,. Recall the clause of the @{text derivative}-function + "Seq DUMMY DUMMY"}\,. Recall the clause of the \derivative\-function for sequence regular expressions: \begin{center} @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} $\dn$ @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} \end{center} - \noindent Consider first the @{text "else"}-branch where the derivative is @{term + \noindent Consider first the \else\-branch where the derivative is @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}. The corresponding value must therefore be of the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches the left-hand - side in clause~\textit{(4)} of @{term inj}. In the @{text "if"}-branch the derivative is an + side in clause~\textit{(4)} of @{term inj}. In the \if\-branch the derivative is an alternative, namely @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c - r\<^sub>2)"}. This means we either have to consider a @{text Left}- or - @{text Right}-value. In case of the @{text Left}-value we know further it + r\<^sub>2)"}. This means we either have to consider a \Left\- or + \Right\-value. In case of the \Left\-value we know further it must be a value for a sequence regular expression. Therefore the pattern we match in the clause \textit{(5)} is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, while in \textit{(6)} it is just @{term "Right v\<^sub>2"}. One more interesting point is in the right-hand side of clause \textit{(6)}: since in this case the - regular expression @{text "r\<^sub>1"} does not ``contribute'' to + regular expression \r\<^sub>1\ does not ``contribute'' to matching the string, that means it only matches the empty string, we need to call @{const mkeps} in order to construct a value for how @{term "r\<^sub>1"} can match this empty string. A similar argument applies for why we can @@ -728,7 +724,7 @@ value has a prepended character @{term c}; the second part shows that the underlying string of an @{const mkeps}-value is always the empty string (given the regular expression is nullable since otherwise - @{text mkeps} might not be defined). + \mkeps\ might not be defined). \begin{lemma}\mbox{}\smallskip\\\label{Prf_injval_flat} \begin{tabular}{ll} @@ -743,16 +739,16 @@ an induction on @{term r}. There are no interesting cases.\qed \end{proof} - Having defined the @{const mkeps} and @{text inj} function we can extend + Having defined the @{const mkeps} and \inj\ function we can extend \Brz's matcher so that a value is constructed (assuming the regular expression matches the string). The clauses of the Sulzmann and Lu lexer are \begin{center} \begin{tabular}{lcl} @{thm (lhs) lexer.simps(1)} & $\dn$ & @{thm (rhs) lexer.simps(1)}\\ - @{thm (lhs) lexer.simps(2)} & $\dn$ & @{text "case"} @{term "lexer (der c r) s"} @{text of}\\ - & & \phantom{$|$} @{term "None"} @{text "\"} @{term None}\\ - & & $|$ @{term "Some v"} @{text "\"} @{term "Some (injval r c v)"} + @{thm (lhs) lexer.simps(2)} & $\dn$ & \case\ @{term "lexer (der c r) s"} \of\\\ + & & \phantom{$|$} @{term "None"} \\\ @{term None}\\ + & & $|$ @{term "Some v"} \\\ @{term "Some (injval r c v)"} \end{tabular} \end{center} @@ -784,24 +780,24 @@ \begin{figure}[t] \begin{center} \begin{tabular}{c} - @{thm[mode=Axiom] Posix.intros(1)}@{text "P"}@{term "ONE"} \qquad - @{thm[mode=Axiom] Posix.intros(2)}@{text "P"}@{term "c"}\medskip\\ - @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}@{text "P+L"}\qquad - @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}@{text "P+R"}\medskip\\ + @{thm[mode=Axiom] Posix.intros(1)}\P\@{term "ONE"} \qquad + @{thm[mode=Axiom] Posix.intros(2)}\P\@{term "c"}\medskip\\ + @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\P+L\\qquad + @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\P+R\\medskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} - {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$@{text "PS"}\\ - @{thm[mode=Axiom] Posix.intros(7)}@{text "P[]"}\medskip\\ + {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\\ + @{thm[mode=Axiom] Posix.intros(7)}\P[]\\medskip\\ $\mprset{flushleft} \inferrule {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} - {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$@{text "P\"} + {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\ \end{tabular} \end{center} \caption{Our inductive definition of POSIX values.}\label{POSIXrules} @@ -825,13 +821,12 @@ \noindent We claim that our @{term "s \ r \ v"} relation captures the idea behind the four informal POSIX rules shown in the Introduction: Consider for example the - rules @{text "P+L"} and @{text "P+R"} where the POSIX value for a string + rules \P+L\ and \P+R\ where the POSIX value for a string and an alternative regular expression, that is @{term "(s, ALT r\<^sub>1 r\<^sub>2)"}, - is specified---it is always a @{text "Left"}-value, \emph{except} when the + is specified---it is always a \Left\-value, \emph{except} when the string to be matched is not in the language of @{term "r\<^sub>1"}; only then it - is a @{text Right}-value (see the side-condition in @{text "P+R"}). - Interesting is also the rule for sequence regular expressions (@{text - "PS"}). The first two premises state that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} + is a \Right\-value (see the side-condition in \P+R\). + Interesting is also the rule for sequence regular expressions (\PS\). The first two premises state that @{term "v\<^sub>1"} and @{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1, r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively. Consider now the third premise and note that the POSIX value of this rule should match the string \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}}. According to the @@ -841,21 +836,20 @@ \emph{exist} an @{term "s\<^sub>3"} and @{term "s\<^sub>4"} such that @{term "s\<^sub>2"} can be split up into a non-empty string @{term "s\<^sub>3"} and a possibly empty string @{term "s\<^sub>4"}. Moreover the longer string @{term "s\<^sub>1 @ s\<^sub>3"} can be - matched by @{text "r\<^sub>1"} and the shorter @{term "s\<^sub>4"} can still be + matched by \r\<^sub>1\ and the shorter @{term "s\<^sub>4"} can still be matched by @{term "r\<^sub>2"}. In this case @{term "s\<^sub>1"} would \emph{not} be the longest initial split of \mbox{@{term "s\<^sub>1 @ s\<^sub>2"}} and therefore @{term "Seq v\<^sub>1 v\<^sub>2"} cannot be a POSIX value for @{term "(s\<^sub>1 @ s\<^sub>2, SEQ r\<^sub>1 r\<^sub>2)"}. The main point is that our side-condition ensures the Longest Match Rule is satisfied. - A similar condition is imposed on the POSIX value in the @{text - "P\"}-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial + A similar condition is imposed on the POSIX value in the \P\\-rule. Also there we want that @{term "s\<^sub>1"} is the longest initial split of @{term "s\<^sub>1 @ s\<^sub>2"} and furthermore the corresponding value @{term v} cannot be flattened to the empty string. In effect, we require that in each ``iteration'' of the star, some non-empty substring needs to be ``chipped'' away; only in case of the empty string we accept @{term "Stars []"} as the POSIX value. Indeed we can show that our POSIX values - are lexical values which exclude those @{text Stars} that contain subvalues + are lexical values which exclude those \Stars\ that contain subvalues that flatten to the empty string. \begin{lemma}\label{LVposix} @@ -879,7 +873,7 @@ \end{proof} \noindent - The central lemma for our POSIX relation is that the @{text inj}-function + The central lemma for our POSIX relation is that the \inj\-function preserves POSIX values. \begin{lemma}\label{Posix2} @@ -887,17 +881,17 @@ \end{lemma} \begin{proof} - By induction on @{text r}. We explain two cases. + By induction on \r\. We explain two cases. \begin{itemize} \item[$\bullet$] Case @{term "r = ALT r\<^sub>1 r\<^sub>2"}. There are - two subcases, namely @{text "(a)"} \mbox{@{term "v = Left v'"}} and @{term - "s \ der c r\<^sub>1 \ v'"}; and @{text "(b)"} @{term "v = Right v'"}, @{term - "s \ L (der c r\<^sub>1)"} and @{term "s \ der c r\<^sub>2 \ v'"}. In @{text "(a)"} we + two subcases, namely \(a)\ \mbox{@{term "v = Left v'"}} and @{term + "s \ der c r\<^sub>1 \ v'"}; and \(b)\ @{term "v = Right v'"}, @{term + "s \ L (der c r\<^sub>1)"} and @{term "s \ der c r\<^sub>2 \ v'"}. In \(a)\ we know @{term "s \ der c r\<^sub>1 \ v'"}, from which we can infer @{term "(c # s) \ r\<^sub>1 \ injval r\<^sub>1 c v'"} by induction hypothesis and hence @{term "(c # s) \ ALT r\<^sub>1 r\<^sub>2 \ injval (ALT r\<^sub>1 r\<^sub>2) c (Left v')"} as needed. Similarly - in subcase @{text "(b)"} where, however, in addition we have to use + in subcase \(b)\ where, however, in addition we have to use Proposition~\ref{derprop}(2) in order to infer @{term "c # s \ L r\<^sub>1"} from @{term "s \ L (der c r\<^sub>1)"}.\smallskip @@ -905,13 +899,13 @@ \begin{quote} \begin{description} - \item[@{text "(a)"}] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"} - \item[@{text "(b)"}] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"} - \item[@{text "(c)"}] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\ nullable r\<^sub>1"} + \item[\(a)\] @{term "v = Left (Seq v\<^sub>1 v\<^sub>2)"} and @{term "nullable r\<^sub>1"} + \item[\(b)\] @{term "v = Right v\<^sub>1"} and @{term "nullable r\<^sub>1"} + \item[\(c)\] @{term "v = Seq v\<^sub>1 v\<^sub>2"} and @{term "\ nullable r\<^sub>1"} \end{description} \end{quote} - \noindent For @{text "(a)"} we know @{term "s\<^sub>1 \ der c r\<^sub>1 \ v\<^sub>1"} and + \noindent For \(a)\ we know @{term "s\<^sub>1 \ der c r\<^sub>1 \ v\<^sub>1"} and @{term "s\<^sub>2 \ r\<^sub>2 \ v\<^sub>2"} as well as % \[@{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ s\<^sub>1 @ s\<^sub>3 \ L (der c r\<^sub>1) \ s\<^sub>4 \ L r\<^sub>2)"}\] @@ -920,12 +914,12 @@ % \[@{term "\ (\s\<^sub>3 s\<^sub>4. s\<^sub>3 \ [] \ s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \ (c # s\<^sub>1) @ s\<^sub>3 \ L r\<^sub>1 \ s\<^sub>4 \ L r\<^sub>2)"}\] - \noindent We can use the induction hypothesis for @{text "r\<^sub>1"} to obtain + \noindent We can use the induction hypothesis for \r\<^sub>1\ to obtain @{term "(c # s\<^sub>1) \ r\<^sub>1 \ injval r\<^sub>1 c v\<^sub>1"}. Putting this all together allows us to infer - @{term "((c # s\<^sub>1) @ s\<^sub>2) \ SEQ r\<^sub>1 r\<^sub>2 \ Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case @{text "(c)"} + @{term "((c # s\<^sub>1) @ s\<^sub>2) \ SEQ r\<^sub>1 r\<^sub>2 \ Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"}. The case \(c)\ is similar. - For @{text "(b)"} we know @{term "s \ der c r\<^sub>2 \ v\<^sub>1"} and + For \(b)\ we know @{term "s \ der c r\<^sub>2 \ v\<^sub>1"} and @{term "s\<^sub>1 @ s\<^sub>2 \ L (SEQ (der c r\<^sub>1) r\<^sub>2)"}. From the former we have @{term "(c # s) \ r\<^sub>2 \ (injval r\<^sub>2 c v\<^sub>1)"} by induction hypothesis for @{term "r\<^sub>2"}. From the latter we can infer @@ -979,11 +973,11 @@ In the next section we show that our specification coincides with another one given by Okui and Suzuki using a different technique. -*} +\ -section {* Ordering of Values according to Okui and Suzuki*} +section \Ordering of Values according to Okui and Suzuki\ -text {* +text \ While in the previous section we have defined POSIX values directly in terms of a ternary relation (see inference rules in Figure~\ref{POSIXrules}), @@ -1014,54 +1008,51 @@ \end{center} \noindent - At position @{text "[0,1]"} of this value is the - subvalue @{text "Char y"} and at position @{text "[1]"} the + At position \[0,1]\ of this value is the + subvalue \Char y\ and at position \[1]\ the subvalue @{term "Char z"}. At the `root' position, or empty list - @{term "[]"}, is the whole value @{term v}. Positions such as @{text - "[0,1,0]"} or @{text "[2]"} are outside of @{text - v}. If it exists, the subvalue of @{term v} at a position @{text - p}, written @{term "at v p"}, can be recursively defined by + @{term "[]"}, is the whole value @{term v}. Positions such as \[0,1,0]\ or \[2]\ are outside of \v\. If it exists, the subvalue of @{term v} at a position \p\, written @{term "at v p"}, can be recursively defined by \begin{center} \begin{tabular}{r@ {\hspace{0mm}}lcl} - @{term v} & @{text "\\<^bsub>[]\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(1)}\\ - @{term "Left v"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(2)}\\ - @{term "Right v"} & @{text "\\<^bsub>1::ps\<^esub>"} & @{text "\"} & + @{term v} & \\\<^bsub>[]\<^esub>\ & \\\& @{thm (rhs) at.simps(1)}\\ + @{term "Left v"} & \\\<^bsub>0::ps\<^esub>\ & \\\& @{thm (rhs) at.simps(2)}\\ + @{term "Right v"} & \\\<^bsub>1::ps\<^esub>\ & \\\ & @{thm (rhs) at.simps(3)[simplified Suc_0_fold]}\\ - @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>0::ps\<^esub>"} & @{text "\"} & + @{term "Seq v\<^sub>1 v\<^sub>2"} & \\\<^bsub>0::ps\<^esub>\ & \\\ & @{thm (rhs) at.simps(4)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} \\ - @{term "Seq v\<^sub>1 v\<^sub>2"} & @{text "\\<^bsub>1::ps\<^esub>"} - & @{text "\"} & + @{term "Seq v\<^sub>1 v\<^sub>2"} & \\\<^bsub>1::ps\<^esub>\ + & \\\ & @{thm (rhs) at.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2", simplified Suc_0_fold]} \\ - @{term "Stars vs"} & @{text "\\<^bsub>n::ps\<^esub>"} & @{text "\"}& @{thm (rhs) at.simps(6)}\\ + @{term "Stars vs"} & \\\<^bsub>n::ps\<^esub>\ & \\\& @{thm (rhs) at.simps(6)}\\ \end{tabular} \end{center} \noindent In the last clause we use Isabelle's notation @{term "vs ! n"} for the - @{text n}th element in a list. The set of positions inside a value @{text v}, + \n\th element in a list. The set of positions inside a value \v\, written @{term "Pos v"}, is given by \begin{center} \begin{tabular}{lcl} - @{thm (lhs) Pos.simps(1)} & @{text "\"} & @{thm (rhs) Pos.simps(1)}\\ - @{thm (lhs) Pos.simps(2)} & @{text "\"} & @{thm (rhs) Pos.simps(2)}\\ - @{thm (lhs) Pos.simps(3)} & @{text "\"} & @{thm (rhs) Pos.simps(3)}\\ - @{thm (lhs) Pos.simps(4)} & @{text "\"} & @{thm (rhs) Pos.simps(4)}\\ + @{thm (lhs) Pos.simps(1)} & \\\ & @{thm (rhs) Pos.simps(1)}\\ + @{thm (lhs) Pos.simps(2)} & \\\ & @{thm (rhs) Pos.simps(2)}\\ + @{thm (lhs) Pos.simps(3)} & \\\ & @{thm (rhs) Pos.simps(3)}\\ + @{thm (lhs) Pos.simps(4)} & \\\ & @{thm (rhs) Pos.simps(4)}\\ @{thm (lhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - & @{text "\"} + & \\\ & @{thm (rhs) Pos.simps(5)[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]}\\ - @{thm (lhs) Pos_stars} & @{text "\"} & @{thm (rhs) Pos_stars}\\ + @{thm (lhs) Pos_stars} & \\\ & @{thm (rhs) Pos_stars}\\ \end{tabular} \end{center} \noindent - whereby @{text len} in the last clause stands for the length of a list. Clearly + whereby \len\ in the last clause stands for the length of a list. Clearly for every position inside a value there exists a subvalue at that position. To help understanding the ordering of Okui and Suzuki, consider again the earlier value - @{text v} and compare it with the following @{text w}: + \v\ and compare it with the following \w\: \begin{center} \begin{tabular}{l} @@ -1070,16 +1061,16 @@ \end{tabular} \end{center} - \noindent Both values match the string @{text "xyz"}, that means if + \noindent Both values match the string \xyz\, that means if we flatten these values at their respective root position, we obtain - @{text "xyz"}. However, at position @{text "[0]"}, @{text v} matches - @{text xy} whereas @{text w} matches only the shorter @{text x}. So - according to the Longest Match Rule, we should prefer @{text v}, - rather than @{text w} as POSIX value for string @{text xyz} (and + \xyz\. However, at position \[0]\, \v\ matches + \xy\ whereas \w\ matches only the shorter \x\. So + according to the Longest Match Rule, we should prefer \v\, + rather than \w\ as POSIX value for string \xyz\ (and corresponding regular expression). In order to formalise this idea, Okui and Suzuki introduce a measure for - subvalues at position @{text p}, called the \emph{norm} of @{text v} - at position @{text p}. We can define this measure in Isabelle as an + subvalues at position \p\, called the \emph{norm} of \v\ + at position \p\. We can define this measure in Isabelle as an integer as follows \begin{center} @@ -1087,10 +1078,10 @@ \end{center} \noindent where we take the length of the flattened value at - position @{text p}, provided the position is inside @{text v}; if - not, then the norm is @{text "-1"}. The default for outside + position \p\, provided the position is inside \v\; if + not, then the norm is \-1\. The default for outside positions is crucial for the POSIX requirement of preferring a - @{text Left}-value over a @{text Right}-value (if they can match the + \Left\-value over a \Right\-value (if they can match the same string---see the Priority Rule from the Introduction). For this consider @@ -1098,17 +1089,17 @@ @{term "v == Left (Char x)"} \qquad and \qquad @{term "w == Right (Char x)"} \end{center} - \noindent Both values match @{text x}. At position @{text "[0]"} - the norm of @{term v} is @{text 1} (the subvalue matches @{text x}), - but the norm of @{text w} is @{text "-1"} (the position is outside - @{text w} according to how we defined the `inside' positions of - @{text Left}- and @{text Right}-values). Of course at position - @{text "[1]"}, the norms @{term "pflat_len v [1]"} and @{term + \noindent Both values match \x\. At position \[0]\ + the norm of @{term v} is \1\ (the subvalue matches \x\), + but the norm of \w\ is \-1\ (the position is outside + \w\ according to how we defined the `inside' positions of + \Left\- and \Right\-values). Of course at position + \[1]\, the norms @{term "pflat_len v [1]"} and @{term "pflat_len w [1]"} are reversed, but the point is that subvalues will be analysed according to lexicographically ordered - positions. According to this ordering, the position @{text "[0]"} - takes precedence over @{text "[1]"} and thus also @{text v} will be - preferred over @{text w}. The lexicographic ordering of positions, written + positions. According to this ordering, the position \[0]\ + takes precedence over \[1]\ and thus also \v\ will be + preferred over \w\. The lexicographic ordering of positions, written @{term "DUMMY \lex DUMMY"}, can be conveniently formalised by three inference rules @@ -1123,18 +1114,18 @@ With the norm and lexicographic order in place, we can state the key definition of Okui and Suzuki - \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller at position @{text p}} than + \cite{OkuiSuzuki2010}: a value @{term "v\<^sub>1"} is \emph{smaller at position \p\} than @{term "v\<^sub>2"}, written @{term "v\<^sub>1 \val p v\<^sub>2"}, - if and only if $(i)$ the norm at position @{text p} is + if and only if $(i)$ the norm at position \p\ is greater in @{term "v\<^sub>1"} (that is the string @{term "flat (at v\<^sub>1 p)"} is longer than @{term "flat (at v\<^sub>2 p)"}) and $(ii)$ all subvalues at positions that are inside @{term "v\<^sub>1"} or @{term "v\<^sub>2"} and that are - lexicographically smaller than @{text p}, we have the same norm, namely + lexicographically smaller than \p\, we have the same norm, namely \begin{center} \begin{tabular}{c} @{thm (lhs) PosOrd_def[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2"]} - @{text "\"} + \\\ $\begin{cases} (i) & @{term "pflat_len v\<^sub>1 p > pflat_len v\<^sub>2 p"} \quad\text{and}\smallskip \\ (ii) & @{term "(\q \ Pos v\<^sub>1 \ Pos v\<^sub>2. q \lex p --> pflat_len v\<^sub>1 q = pflat_len v\<^sub>2 q)"} @@ -1142,11 +1133,9 @@ \end{tabular} \end{center} - \noindent The position @{text p} in this definition acts as the - \emph{first distinct position} of @{text "v\<^sub>1"} and @{text - "v\<^sub>2"}, where both values match strings of different length - \cite{OkuiSuzuki2010}. Since at @{text p} the values @{text - "v\<^sub>1"} and @{text "v\<^sub>2"} match different strings, the + \noindent The position \p\ in this definition acts as the + \emph{first distinct position} of \v\<^sub>1\ and \v\<^sub>2\, where both values match strings of different length + \cite{OkuiSuzuki2010}. Since at \p\ the values \v\<^sub>1\ and \v\<^sub>2\ match different strings, the ordering is irreflexive. Derived from the definition above are the following two orderings: @@ -1168,11 +1157,8 @@ @{thm [mode=IfThen] PosOrd_trans[where ?v1.0="v\<^sub>1" and ?v2.0="v\<^sub>2" and ?v3.0="v\<^sub>3"]} \end{lemma} - \begin{proof} From the assumption we obtain two positions @{text p} - and @{text q}, where the values @{text "v\<^sub>1"} and @{text - "v\<^sub>2"} (respectively @{text "v\<^sub>2"} and @{text - "v\<^sub>3"}) are `distinct'. Since @{text - "\\<^bsub>lex\<^esub>"} is trichotomous, we need to consider + \begin{proof} From the assumption we obtain two positions \p\ + and \q\, where the values \v\<^sub>1\ and \v\<^sub>2\ (respectively \v\<^sub>2\ and \v\<^sub>3\) are `distinct'. Since \\\<^bsub>lex\<^esub>\ is trichotomous, we need to consider three cases, namely @{term "p = q"}, @{term "p \lex q"} and @{term "q \lex p"}. Let us look at the first case. Clearly @{term "pflat_len v\<^sub>2 p < pflat_len v\<^sub>1 p"} and @{term @@ -1184,7 +1170,7 @@ v\<^sub>1"}, then we can infer from the first assumption that @{term "pflat_len v\<^sub>1 p' = pflat_len v\<^sub>2 p'"}. But this means that @{term "p'"} must be in @{term "Pos v\<^sub>2"} too (the norm - cannot be @{text "-1"} given @{term "p' \ Pos v\<^sub>1"}). + cannot be \-1\ given @{term "p' \ Pos v\<^sub>1"}). Hence we can use the second assumption and infer @{term "pflat_len v\<^sub>2 p' = pflat_len v\<^sub>3 p'"}, which concludes this case with @{term "v\<^sub>1 :\val @@ -1193,15 +1179,15 @@ \noindent The proof for $\preccurlyeq$ is similar and omitted. - It is also straightforward to show that @{text "\"} and + It is also straightforward to show that \\\ and $\preccurlyeq$ are partial orders. Okui and Suzuki furthermore show that they are linear orderings for lexical values \cite{OkuiSuzuki2010} of a given regular expression and given string, but we have not formalised this in Isabelle. It is not essential for our results. What we are going to show below is - that for a given @{text r} and @{text s}, the orderings have a unique + that for a given \r\ and \s\, the orderings have a unique minimal element on the set @{term "LV r s"}, which is the POSIX value we defined in the previous section. We start with two properties that - show how the length of a flattened value relates to the @{text "\"}-ordering. + show how the length of a flattened value relates to the \\\-ordering. \begin{proposition}\mbox{}\smallskip\\\label{ordlen} \begin{tabular}{@ {}ll} @@ -1259,8 +1245,7 @@ \noindent One might prefer that statements \textit{(4)} and \textit{(5)} (respectively \textit{(6)} and \textit{(7)}) - are combined into a single \textit{iff}-statement (like the ones for @{text - Left} and @{text Right}). Unfortunately this cannot be done easily: such + are combined into a single \textit{iff}-statement (like the ones for \Left\ and \Right\). Unfortunately this cannot be done easily: such a single statement would require an additional assumption about the two values @{term "Seq v\<^sub>1 v\<^sub>2"} and @{term "Seq w\<^sub>1 w\<^sub>2"} being inhabited by the same regular expression. The @@ -1271,10 +1256,8 @@ Next we establish how Okui and Suzuki's orderings relate to our - definition of POSIX values. Given a @{text POSIX} value @{text "v\<^sub>1"} - for @{text r} and @{text s}, then any other lexical value @{text - "v\<^sub>2"} in @{term "LV r s"} is greater or equal than @{text - "v\<^sub>1"}, namely: + definition of POSIX values. Given a \POSIX\ value \v\<^sub>1\ + for \r\ and \s\, then any other lexical value \v\<^sub>2\ in @{term "LV r s"} is greater or equal than \v\<^sub>1\, namely: \begin{theorem}\label{orderone} @@ -1283,53 +1266,53 @@ \begin{proof} By induction on our POSIX rules. By Theorem~\ref{posixdeterm} and the definition of @{const LV}, it is clear - that @{text "v\<^sub>1"} and @{text "v\<^sub>2"} have the same + that \v\<^sub>1\ and \v\<^sub>2\ have the same underlying string @{term s}. The three base cases are straightforward: for example for @{term "v\<^sub>1 = Void"}, we have that @{term "v\<^sub>2 \ LV ONE []"} must also be of the form \mbox{@{term "v\<^sub>2 = Void"}}. Therefore we have @{term "v\<^sub>1 :\val v\<^sub>2"}. The inductive cases for - @{text r} being of the form @{term "ALT r\<^sub>1 r\<^sub>2"} and + \r\ being of the form @{term "ALT r\<^sub>1 r\<^sub>2"} and @{term "SEQ r\<^sub>1 r\<^sub>2"} are as follows: \begin{itemize} - \item[$\bullet$] Case @{text "P+L"} with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) + \item[$\bullet$] Case \P+L\ with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) \ (Left w\<^sub>1)"}: In this case the value @{term "v\<^sub>2"} is either of the form @{term "Left w\<^sub>2"} or @{term "Right w\<^sub>2"}. In the latter case we can immediately conclude with \mbox{@{term "v\<^sub>1 - :\val v\<^sub>2"}} since a @{text Left}-value with the - same underlying string @{text s} is always smaller than a - @{text Right}-value by Proposition~\ref{ordintros}\textit{(1)}. + :\val v\<^sub>2"}} since a \Left\-value with the + same underlying string \s\ is always smaller than a + \Right\-value by Proposition~\ref{ordintros}\textit{(1)}. In the former case we have @{term "w\<^sub>2 \ LV r\<^sub>1 s"} and can use the induction hypothesis to infer @{term "w\<^sub>1 :\val w\<^sub>2"}. Because @{term "w\<^sub>1"} and @{term "w\<^sub>2"} have the same underlying string - @{text s}, we can conclude with @{term "Left w\<^sub>1 + \s\, we can conclude with @{term "Left w\<^sub>1 :\val Left w\<^sub>2"} using Proposition~\ref{ordintros}\textit{(2)}.\smallskip - \item[$\bullet$] Case @{text "P+R"} with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) + \item[$\bullet$] Case \P+R\ with @{term "s \ (ALT r\<^sub>1 r\<^sub>2) \ (Right w\<^sub>1)"}: This case similar to the previous case, except that we additionally know @{term "s \ L r\<^sub>1"}. This is needed when @{term "v\<^sub>2"} is of the form \mbox{@{term "Left w\<^sub>2"}}. Since \mbox{@{term "flat v\<^sub>2 = flat - w\<^sub>2"} @{text "= s"}} and @{term "\ w\<^sub>2 : + w\<^sub>2"} \= s\} and @{term "\ w\<^sub>2 : r\<^sub>1"}, we can derive a contradiction for \mbox{@{term "s \ L r\<^sub>1"}} using Proposition~\ref{inhabs}. So also in this case \mbox{@{term "v\<^sub>1 :\val v\<^sub>2"}}.\smallskip - \item[$\bullet$] Case @{text "PS"} with @{term "(s\<^sub>1 @ + \item[$\bullet$] Case \PS\ with @{term "(s\<^sub>1 @ s\<^sub>2) \ (SEQ r\<^sub>1 r\<^sub>2) \ (Seq w\<^sub>1 w\<^sub>2)"}: We can assume @{term "v\<^sub>2 = Seq (u\<^sub>1) (u\<^sub>2)"} with @{term "\ u\<^sub>1 : r\<^sub>1"} and \mbox{@{term "\ u\<^sub>2 : r\<^sub>2"}}. We have @{term "s\<^sub>1 @ s\<^sub>2 = (flat u\<^sub>1) @ (flat u\<^sub>2)"}. By the side-condition of the - @{text PS}-rule we know that either @{term "s\<^sub>1 = flat + \PS\-rule we know that either @{term "s\<^sub>1 = flat u\<^sub>1"} or that @{term "flat u\<^sub>1"} is a strict prefix of @{term "s\<^sub>1"}. In the latter case we can infer @{term "w\<^sub>1 :\val u\<^sub>1"} by @@ -1348,19 +1331,18 @@ \end{itemize} - \noindent The case for @{text "P\"} is similar to the @{text PS}-case and omitted.\qed + \noindent The case for \P\\ is similar to the \PS\-case and omitted.\qed \end{proof} - \noindent This theorem shows that our @{text POSIX} value for a - regular expression @{text r} and string @{term s} is in fact a - minimal element of the values in @{text "LV r s"}. By + \noindent This theorem shows that our \POSIX\ value for a + regular expression \r\ and string @{term s} is in fact a + minimal element of the values in \LV r s\. By Proposition~\ref{ordlen}\textit{(2)} we also know that any value in - @{text "LV r s'"}, with @{term "s'"} being a strict prefix, cannot be - smaller than @{text "v\<^sub>1"}. The next theorem shows the + \LV r s'\, with @{term "s'"} being a strict prefix, cannot be + smaller than \v\<^sub>1\. The next theorem shows the opposite---namely any minimal element in @{term "LV r s"} must be a - @{text POSIX} value. This can be established by induction on @{text - r}, but the proof can be drastically simplified by using the fact - from the previous section about the existence of a @{text POSIX} value + \POSIX\ value. This can be established by induction on \r\, but the proof can be drastically simplified by using the fact + from the previous section about the existence of a \POSIX\ value whenever a string @{term "s \ L r"}. @@ -1372,7 +1354,7 @@ If @{thm (prem 1) PosOrd_Posix[where ?v1.0="v\<^sub>1"]} then @{term "s \ L r"} by Proposition~\ref{inhabs}. Hence by Theorem~\ref{lexercorrect}(2) there exists a - @{text POSIX} value @{term "v\<^sub>P"} with @{term "s \ r \ v\<^sub>P"} + \POSIX\ value @{term "v\<^sub>P"} with @{term "s \ r \ v\<^sub>P"} and by Lemma~\ref{LVposix} we also have \mbox{@{term "v\<^sub>P \ LV r s"}}. By Theorem~\ref{orderone} we therefore have @{term "v\<^sub>P :\val v\<^sub>1"}. If @{term "v\<^sub>P = v\<^sub>1"} then @@ -1393,17 +1375,17 @@ \noindent To sum up, we have shown that the (unique) minimal elements - of the ordering by Okui and Suzuki are exactly the @{text POSIX} + of the ordering by Okui and Suzuki are exactly the \POSIX\ values we defined inductively in Section~\ref{posixsec}. This provides - an independent confirmation that our ternary relation formalise the + an independent confirmation that our ternary relation formalises the informal POSIX rules. -*} +\ -section {* Bitcoded Lexing *} +section \Bitcoded Lexing\ -text {* +text \ Incremental calculation of the value. To simplify the proof we first define the function @{const flex} which calculates the ``iterated'' injection function. With this we can @@ -1502,7 +1484,7 @@ @{thm [mode=IfThen] bder_retrieve} -By induction on @{text r} +By induction on \r\ \begin{theorem}[Main Lemma]\mbox{}\\ @{thm [mode=IfThen] MAIN_decode} @@ -1518,11 +1500,11 @@ @{thm blexer_correctness} \end{theorem} -*} +\ -section {* Optimisations *} +section \Optimisations\ -text {* +text \ Derivatives as calculated by \Brz's method are usually more complex regular expressions than the initial one; the result is that the @@ -1538,10 +1520,10 @@ \begin{equation}\label{Simpl} \begin{array}{lcllcllcllcl} - @{term "ALT ZERO r"} & @{text "\"} & @{term r} \hspace{8mm}%\\ - @{term "ALT r ZERO"} & @{text "\"} & @{term r} \hspace{8mm}%\\ - @{term "SEQ ONE r"} & @{text "\"} & @{term r} \hspace{8mm}%\\ - @{term "SEQ r ONE"} & @{text "\"} & @{term r} + @{term "ALT ZERO r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "ALT r ZERO"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ ONE r"} & \\\ & @{term r} \hspace{8mm}%\\ + @{term "SEQ r ONE"} & \\\ & @{term r} \end{array} \end{equation} @@ -1558,15 +1540,15 @@ \begin{center} \begin{tabular}{lcl} - @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & @{text "Right (f v)"}\\ - @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & @{text "Left (f v)"}\\ + @{thm (lhs) F_RIGHT.simps(1)} & $\dn$ & \Right (f v)\\\ + @{thm (lhs) F_LEFT.simps(1)} & $\dn$ & \Left (f v)\\\ - @{thm (lhs) F_ALT.simps(1)} & $\dn$ & @{text "Right (f\<^sub>2 v)"}\\ - @{thm (lhs) F_ALT.simps(2)} & $\dn$ & @{text "Left (f\<^sub>1 v)"}\\ + @{thm (lhs) F_ALT.simps(1)} & $\dn$ & \Right (f\<^sub>2 v)\\\ + @{thm (lhs) F_ALT.simps(2)} & $\dn$ & \Left (f\<^sub>1 v)\\\ - @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 ()) (f\<^sub>2 v)"}\\ - @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v) (f\<^sub>2 ())"}\\ - @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & @{text "Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)"}\medskip\\ + @{thm (lhs) F_SEQ1.simps(1)} & $\dn$ & \Seq (f\<^sub>1 ()) (f\<^sub>2 v)\\\ + @{thm (lhs) F_SEQ2.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v) (f\<^sub>2 ())\\\ + @{thm (lhs) F_SEQ.simps(1)} & $\dn$ & \Seq (f\<^sub>1 v\<^sub>1) (f\<^sub>2 v\<^sub>2)\\medskip\\ %\end{tabular} % %\begin{tabular}{lcl} @@ -1580,7 +1562,7 @@ \end{center} \noindent - The functions @{text "simp\<^bsub>Alt\<^esub>"} and @{text "simp\<^bsub>Seq\<^esub>"} encode the simplification rules + The functions \simp\<^bsub>Alt\<^esub>\ and \simp\<^bsub>Seq\<^esub>\ encode the simplification rules in \eqref{Simpl} and compose the rectification functions (simplifications can occur deep inside the regular expression). The main simplification function is then @@ -1602,17 +1584,17 @@ \begin{tabular}{lcl} @{thm (lhs) slexer.simps(1)} & $\dn$ & @{thm (rhs) slexer.simps(1)}\\ @{thm (lhs) slexer.simps(2)} & $\dn$ & - @{text "let (r\<^sub>s, f\<^sub>r) = simp (r "}$\backslash$@{text " c) in"}\\ - & & @{text "case"} @{term "slexer r\<^sub>s s"} @{text of}\\ - & & \phantom{$|$} @{term "None"} @{text "\"} @{term None}\\ - & & $|$ @{term "Some v"} @{text "\"} @{text "Some (inj r c (f\<^sub>r v))"} + \let (r\<^sub>s, f\<^sub>r) = simp (r \$\backslash$\ c) in\\\ + & & \case\ @{term "slexer r\<^sub>s s"} \of\\\ + & & \phantom{$|$} @{term "None"} \\\ @{term None}\\ + & & $|$ @{term "Some v"} \\\ \Some (inj r c (f\<^sub>r v))\ \end{tabular} \end{center} \noindent In the second clause we first calculate the derivative @{term "der c r"} and then simplify the result. This gives us a simplified derivative - @{text "r\<^sub>s"} and a rectification function @{text "f\<^sub>r"}. The lexer + \r\<^sub>s\ and a rectification function \f\<^sub>r\. The lexer is then recursively called with the simplified derivative, but before we inject the character @{term c} into the value @{term v}, we need to rectify @{term v} (that is construct @{term "f\<^sub>r v"}). Before we can establish the correctness @@ -1627,7 +1609,7 @@ \end{tabular} \end{lemma} - \begin{proof} Both are by induction on @{text r}. There is no + \begin{proof} Both are by induction on \r\. There is no interesting case for the first statement. For the second statement, of interest are the @{term "r = ALT r\<^sub>1 r\<^sub>2"} and @{term "r = SEQ r\<^sub>1 r\<^sub>2"} cases. In each case we have to analyse four subcases whether @@ -1639,7 +1621,7 @@ and by IH also (*) @{term "s \ r\<^sub>2 \ (snd (simp r\<^sub>2) v)"}. Given @{term "fst (simp r\<^sub>1) = ZERO"} we know @{term "L (fst (simp r\<^sub>1)) = {}"}. By the first statement @{term "L r\<^sub>1"} is the empty set, meaning (**) @{term "s \ L r\<^sub>1"}. - Taking (*) and (**) together gives by the \mbox{@{text "P+R"}}-rule + Taking (*) and (**) together gives by the \mbox{\P+R\}-rule @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ Right (snd (simp r\<^sub>2) v)"}. In turn this gives @{term "s \ ALT r\<^sub>1 r\<^sub>2 \ snd (simp (ALT r\<^sub>1 r\<^sub>2)) v"} as we need to show. The other cases are similar.\qed @@ -1683,12 +1665,12 @@ \end{proof} -*} +\ -section {* HERE *} +section \HERE\ -text {* +text \ \begin{center} \begin{tabular}{llcl} 1) & @{thm (lhs) erase.simps(1)} & $\dn$ & @{thm (rhs) erase.simps(1)}\\ @@ -1751,7 +1733,7 @@ We can move out the @{term "fuse [Z]"} and then use the IH to show that left-hand side and right-hand side are equal. This completes the proof. \end{proof} -*} +\ diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/Lexer.thy --- a/thys/Lexer.thy Tue Jul 23 21:21:49 2019 +0100 +++ b/thys/Lexer.thy Mon Jul 29 09:37:20 2019 +0100 @@ -299,6 +299,105 @@ apply(simp_all add: flex_fun_apply) done -unused_thms +lemma Posix_flex: + assumes "s2 \ (ders s1 r) \ v" + shows "(s1 @ s2) \ r \ flex r id s1 v" + using assms + apply(induct s1 arbitrary: r v s2) + apply(simp) + apply(simp) + apply(drule_tac x="der a r" in meta_spec) + apply(drule_tac x="v" in meta_spec) + apply(drule_tac x="s2" in meta_spec) + apply(simp) + using Posix_injval + apply(drule_tac Posix_injval) + apply(subst (asm) (5) flex_fun_apply) + apply(simp) + done + +lemma injval_inj: + assumes "\ a : (der c r)" "\ v : (der c r)" "injval r c a = injval r c v" + shows "a = v" + using assms + apply(induct r arbitrary: a c v) + apply(auto) + using Prf_elims(1) apply blast + using Prf_elims(1) apply blast + apply(case_tac "c = x") + apply(auto) + using Prf_elims(4) apply auto[1] + using Prf_elims(1) apply blast + prefer 2 + apply (smt Prf_elims(3) injval.simps(2) injval.simps(3) val.distinct(25) val.inject(3) val.inject(4)) + apply(case_tac "nullable r1") + apply(auto) + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(auto) + apply (metis Prf_injval_flat list.distinct(1) mkeps_flat) + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(auto) + using Prf_injval_flat mkeps_flat apply fastforce + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(auto) + apply(erule Prf_elims) + apply(erule Prf_elims) + apply(auto) + apply (smt Prf_elims(6) injval.simps(7) list.inject val.inject(5)) + by (smt Prf_elims(6) injval.simps(7) list.inject val.inject(5)) + + + +lemma uu: + assumes "(c # s) \ r \ injval r c v" "\ v : (der c r)" + shows "s \ der c r \ v" + using assms + apply - + apply(subgoal_tac "lexer r (c # s) = Some (injval r c v)") + prefer 2 + using lexer_correctness(1) apply blast + apply(simp add: ) + apply(case_tac "lexer (der c r) s") + apply(simp) + apply(simp) + apply(case_tac "s \ der c r \ a") + prefer 2 + apply (simp add: lexer_correctness(1)) + apply(subgoal_tac "\ a : (der c r)") + prefer 2 + using Posix_Prf apply blast + using injval_inj by blast + + +lemma Posix_flex2: + assumes "(s1 @ s2) \ r \ flex r id s1 v" "\ v : ders s1 r" + shows "s2 \ (ders s1 r) \ v" + using assms + apply(induct s1 arbitrary: r v s2 rule: rev_induct) + apply(simp) + apply(simp) + apply(drule_tac x="r" in meta_spec) + apply(drule_tac x="injval (ders xs r) x v" in meta_spec) + apply(drule_tac x="x#s2" in meta_spec) + apply(simp add: flex_append ders_append) + using Prf_injval uu by blast + +lemma Posix_flex3: + assumes "s1 \ r \ flex r id s1 v" "\ v : ders s1 r" + shows "[] \ (ders s1 r) \ v" + using assms + by (simp add: Posix_flex2) + +lemma flex_injval: + shows "flex (der a r) (injval r a) s v = injval r a (flex (der a r) id s v)" + by (simp add: flex_fun_apply) + + + end \ No newline at end of file diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/Positions.thy --- a/thys/Positions.thy Tue Jul 23 21:21:49 2019 +0100 +++ b/thys/Positions.thy Mon Jul 29 09:37:20 2019 +0100 @@ -3,9 +3,9 @@ imports "Spec" "Lexer" begin -chapter {* An alternative definition for POSIX values *} +chapter \An alternative definition for POSIX values\ -section {* Positions in Values *} +section \Positions in Values\ fun at :: "val \ nat list \ val" @@ -76,7 +76,7 @@ -section {* Orderings *} +section \Orderings\ definition prefix_list:: "'a list \ 'a list \ bool" ("_ \pre _" [60,59] 60) @@ -128,7 +128,7 @@ -section {* POSIX Ordering of Values According to Okui \& Suzuki *} +section \POSIX Ordering of Values According to Okui \& Suzuki\ definition PosOrd:: "val \ nat list \ val \ bool" ("_ \val _ _" [60, 60, 59] 60) @@ -512,7 +512,7 @@ -section {* The Posix Value is smaller than any other Value *} +section \The Posix Value is smaller than any other Value\ lemma Posix_PosOrd: diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/ROOT --- a/thys/ROOT Tue Jul 23 21:21:49 2019 +0100 +++ b/thys/ROOT Mon Jul 29 09:37:20 2019 +0100 @@ -20,10 +20,18 @@ "root.tex" -session Journal in "Journal" = Lex + +session Journal in "Journal" = HOL + options [document = pdf, document_output = "..", document_variants="journal"] - theories - "~~/src/HOL/Library/LaTeXsugar" + theories [document = false] + "~~/src/HOL/Library/LaTeXsugar" + "~~/src/HOL/Library/Sublist" + "../Spec" + "../RegLangs" + "../Lexer" + "../Simplifying" + "../Sulzmann" + "../Positions" + theories [document = true] "Paper" "PaperExt" document_files diff -r a730a5a0bab9 -r 89e6605c4ca4 thys/journal.pdf Binary file thys/journal.pdf has changed