# HG changeset patch # User Chengsong # Date 1643585849 0 # Node ID 1500f96707b0315240b653d5ab0b3c00b0c6ca9a # Parent 6291181fad07de91ed58763011e2e0531e4c1bc4# Parent 1612f2a77bf63d9c5335cc5b99abd97b3662cb93 ha diff -r 6291181fad07 -r 1500f96707b0 Literature/Verbatim-2021-paper.pdf Binary file Literature/Verbatim-2021-paper.pdf has changed diff -r 6291181fad07 -r 1500f96707b0 Literature/VerbatimPP-Lexer.pdf Binary file Literature/VerbatimPP-Lexer.pdf has changed diff -r 6291181fad07 -r 1500f96707b0 thys/BitCodedCT.thy --- a/thys/BitCodedCT.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys/BitCodedCT.thy Sun Jan 30 23:37:29 2022 +0000 @@ -29,7 +29,7 @@ where "decode' ds ZERO = (Void, [])" | "decode' ds ONE = (Void, ds)" -| "decode' ds (CHAR d) = (Char d, ds)" +| "decode' ds (CH d) = (Char d, ds)" | "decode' [] (ALT r1 r2) = (Void, [])" | "decode' (Z # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r1 in (Left v, ds'))" | "decode' (S # ds) (ALT r1 r2) = (let (v, ds') = decode' ds r2 in (Right v, ds'))" diff -r 6291181fad07 -r 1500f96707b0 thys/LexerExt.thy --- a/thys/LexerExt.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys/LexerExt.thy Sun Jan 30 23:37:29 2022 +0000 @@ -17,10 +17,15 @@ | "mkeps(NTIMES r n) = Stars (replicate n (mkeps r))" | "mkeps(FROMNTIMES r n) = Stars (replicate n (mkeps r))" | "mkeps(NMTIMES r n m) = Stars (replicate n (mkeps r))" - +| "mkeps(NOT ZERO) = Nt Void" +| "mkeps(NOT (CH _ )) = Nt Void" +| "mkeps(NOT (SEQ r1 r2)) = Seq (mkeps (NOT r1)) (mkeps (NOT r1))" +| "mkeps(NOT (ALT r1 r2)) = (if nullable(r1) then Right (mkeps (NOT r2)) else (mkeps (NOT r1)))" + + fun injval :: "rexp \ char \ val \ val" where - "injval (CHAR d) c Void = Char d" + "injval (CH d) c Void = Char d" | "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)" | "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)" | "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2" @@ -49,9 +54,11 @@ shows "flat (mkeps r) = []" using assms apply(induct rule: nullable.induct) - apply(auto) - by presburger - + apply(auto) + apply presburger + apply(case_tac r) + apply(auto) + sorry lemma mkeps_nullable: assumes "nullable(r)" @@ -81,7 +88,7 @@ apply (simp add: mkeps_flat) apply(simp) apply(simp) -done + sorry lemma Prf_injval_flat: @@ -90,7 +97,7 @@ using assms apply(induct arbitrary: v rule: der.induct) apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits) -done + sorry lemma Prf_injval: assumes "\ v : der c r" @@ -170,7 +177,8 @@ apply(simp add: Prf_injval_flat) apply(simp) apply(simp) -done + sorry + @@ -186,8 +194,10 @@ apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def) apply(subst append.simps(1)[symmetric]) apply(rule Posix.intros) - apply(auto) - done + apply(auto) + apply(case_tac r) + apply(auto) + sorry lemma Posix_injval: @@ -207,22 +217,22 @@ then have "False" by cases then show "(c # s) \ ONE \ (injval ONE c v)" by simp next - case (CHAR d) + case (CH d) consider (eq) "c = d" | (ineq) "c \ d" by blast - then show "(c # s) \ (CHAR d) \ (injval (CHAR d) c v)" + then show "(c # s) \ (CH d) \ (injval (CH d) c v)" proof (cases) case eq - have "s \ der c (CHAR d) \ v" by fact + have "s \ der c (CH d) \ v" by fact then have "s \ ONE \ v" using eq by simp then have eqs: "s = [] \ v = Void" by cases simp - show "(c # s) \ CHAR d \ injval (CHAR d) c v" using eq eqs + show "(c # s) \ CH d \ injval (CH d) c v" using eq eqs by (auto intro: Posix.intros) next case ineq - have "s \ der c (CHAR d) \ v" by fact + have "s \ der c (CH d) \ v" by fact then have "s \ ZERO \ v" using ineq by simp then have "False" by cases - then show "(c # s) \ CHAR d \ injval (CHAR d) c v" by simp + then show "(c # s) \ CH d \ injval (CH d) c v" by simp qed next case (ALT r1 r2) @@ -319,7 +329,6 @@ apply(simp) apply(subgoal_tac "\vss. v2 = Stars vss") apply(clarify) - apply(drule_tac x="v1" in meta_spec) apply(drule_tac x="vss" in meta_spec) apply(drule_tac x="s1" in meta_spec) apply(drule_tac x="s2" in meta_spec) @@ -401,7 +410,6 @@ apply(simp) apply(subgoal_tac "\vss. v2 = Stars vss") apply(clarify) - apply(drule_tac x="v1" in meta_spec) apply(drule_tac x="vss" in meta_spec) apply(drule_tac x="s1" in meta_spec) apply(drule_tac x="s2" in meta_spec) @@ -454,7 +462,6 @@ apply(simp) apply(subgoal_tac "\vss. v2 = Stars vss") apply(clarify) - apply(drule_tac x="v1" in meta_spec) apply(drule_tac x="vss" in meta_spec) apply(drule_tac x="s1" in meta_spec) apply(drule_tac x="s2" in meta_spec) @@ -544,7 +551,6 @@ apply(simp) apply(subgoal_tac "\vss. v2 = Stars vss") apply(clarify) - apply(drule_tac x="v1" in meta_spec) apply(drule_tac x="vss" in meta_spec) apply(drule_tac x="s1" in meta_spec) apply(drule_tac x="s2" in meta_spec) @@ -618,6 +624,9 @@ apply(simp) done qed + next + case (NOT r s v) + then show ?case sorry qed section {* Lexer Correctness *} diff -r 6291181fad07 -r 1500f96707b0 thys/SpecExt.thy --- a/thys/SpecExt.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys/SpecExt.thy Sun Jan 30 23:37:29 2022 +0000 @@ -1,6 +1,6 @@ theory SpecExt - imports Main (*"~~/src/HOL/Library/Sublist"*) + imports Main "HOL-Library.Sublist" (*"~~/src/HOL/Library/Sublist"*) begin section {* Sequential Composition of Languages *} @@ -165,10 +165,6 @@ apply(auto simp add: Sequ_def) done -lemma - assumes "[] \ A" "n \ 0" "A \ {}" - shows "A \ (Suc n) = A \ n" - lemma Der_Pow_0: shows "Der c (A \ 0) = {}" by(simp add: Der_def) @@ -221,7 +217,7 @@ datatype rexp = ZERO | ONE -| CHAR char +| CH char | SEQ rexp rexp | ALT rexp rexp | STAR rexp @@ -238,7 +234,7 @@ where "L (ZERO) = {}" | "L (ONE) = {[]}" -| "L (CHAR c) = {[c]}" +| "L (CH c) = {[c]}" | "L (SEQ r1 r2) = (L r1) ;; (L r2)" | "L (ALT r1 r2) = (L r1) \ (L r2)" | "L (STAR r) = (L r)\" @@ -255,7 +251,7 @@ where "nullable (ZERO) = False" | "nullable (ONE) = True" -| "nullable (CHAR c) = False" +| "nullable (CH c) = False" | "nullable (ALT r1 r2) = (nullable r1 \ nullable r2)" | "nullable (SEQ r1 r2) = (nullable r1 \ nullable r2)" | "nullable (STAR r) = True" @@ -270,7 +266,7 @@ where "der c (ZERO) = ZERO" | "der c (ONE) = ZERO" -| "der c (CHAR d) = (if c = d then ONE else ZERO)" +| "der c (CH d) = (if c = d then ONE else ZERO)" | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" | "der c (SEQ r1 r2) = (if nullable r1 @@ -340,48 +336,6 @@ by (metis append.assoc) - -lemma - "L(FROMNTIMES r n) = L(SEQ (NTIMES r n) (STAR r))" - apply(auto simp add: Sequ_def) - defer - apply(subgoal_tac "\m. s2 \ (L r) \ m") - prefer 2 - apply (simp add: Star_Pow) - apply(auto) - apply(rule_tac x="n + m" in bexI) - apply (simp add: pow_add) - apply simp - apply(subgoal_tac "\m. m + n = xa") - apply(auto) - prefer 2 - using le_add_diff_inverse2 apply auto[1] - by (smt Pow_Star Sequ_def add.commute mem_Collect_eq pow_add2) - -lemma - "L (der c (FROMNTIMES r n)) = - L (SEQ (der c r) (FROMNTIMES r (n - 1)))" - apply(auto simp add: Sequ_def) - using Star_Pow apply blast - using Pow_Star by blast - -lemma - "L (der c (UPNTIMES r n)) = - L(if n = 0 then ZERO else - ALT (der c r) (SEQ (der c r) (UPNTIMES r (n - 1))))" - apply(auto simp add: Sequ_def) - using SpecExt.Pow_empty by blast - -abbreviation "FROM \ FROMNTIMES" - -lemma - shows "L (der c (FROM r n)) = - L (if n <= 0 then SEQ (der c r) (ALT ONE (FROM r 0)) - else SEQ (der c r) (ALT ZERO (FROM r (n -1))))" - apply(auto simp add: Sequ_def) - oops - - fun ders :: "string \ rexp \ rexp" where @@ -454,8 +408,10 @@ apply(auto simp add: Sequ_def Der_def Cons_eq_append_conv)[1] apply(rule_tac x="Suc xa" in bexI) apply(auto simp add: Sequ_def)[2] -apply (metis append_Cons) -apply (metis (no_types, hide_lams) Pow_decomp atMost_iff diff_Suc_eq_diff_pred diff_is_0_eq) + apply (metis append_Cons) + apply(rule_tac x="xa - 1" in bexI) + apply(auto simp add: Sequ_def)[2] + apply (metis One_nat_def Pow_decomp) apply(rule impI)+ apply(subst Sequ_Union_in) apply(subst Der_Pow_Sequ[symmetric]) @@ -485,7 +441,7 @@ | Right val | Left val | Stars "val list" - +| Nt val section {* The string behind a value *} @@ -499,6 +455,7 @@ | "flat (Seq v1 v2) = (flat v1) @ (flat v2)" | "flat (Stars []) = []" | "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" +| "flat (Nt v) = flat v" abbreviation "flats vs \ concat (map flat vs)" @@ -597,7 +554,7 @@ | "\ v1 : r1 \ \ Left v1 : ALT r1 r2" | "\ v2 : r2 \ \ Right v2 : ALT r1 r2" | "\ Void : ONE" -| "\ Char c : CHAR c" +| "\ Char c : CH c" | "\\v \ set vs. \ v : r \ flat v \ []\ \ \ Stars vs : STAR r" | "\\v \ set vs. \ v : r \ flat v \ []; length vs \ n\ \ \ Stars vs : UPNTIMES r n" | "\\v \ set vs1. \ v : r \ flat v \ []; @@ -622,7 +579,7 @@ "\ v : SEQ r1 r2" "\ v : ALT r1 r2" "\ v : ONE" - "\ v : CHAR c" + "\ v : CH c" "\ vs : STAR r" "\ vs : UPNTIMES r n" "\ vs : NTIMES r n" @@ -867,7 +824,10 @@ then obtain vs where "flats vs = s" "\v\set vs. \ v : r \ flat v \ []" "length vs \ n" using IH flats_cval_nonempty by (smt order.trans) then show "\v. \ v : UPNTIMES r n \ flat v = s" - using Prf.intros(7) flat_Stars by blast + using Prf.intros(7) flat_Stars by blast +next + case (NOT r) + then show ?case sorry qed (auto intro: Prf.intros) @@ -917,7 +877,7 @@ lemma LV_simps: shows "LV ZERO s = {}" and "LV ONE s = (if s = [] then {Void} else {})" - and "LV (CHAR c) s = (if s = [c] then {Char c} else {})" + and "LV (CH c) s = (if s = [c] then {Char c} else {})" and "LV (ALT r1 r2) s = Left ` LV r1 s \ Right ` LV r2 s" unfolding LV_def apply(auto intro: Prf.intros elim: Prf.cases) @@ -1226,8 +1186,8 @@ case (ONE s) show "finite (LV ONE s)" by (simp add: LV_simps) next - case (CHAR c s) - show "finite (LV (CHAR c) s)" by (simp add: LV_simps) + case (CH c s) + show "finite (LV (CH c) s)" by (simp add: LV_simps) next case (ALT r1 r2 s) then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps) @@ -1270,7 +1230,10 @@ case (NMTIMES r n m s) have "\s. finite (LV r s)" by fact then show "finite (LV (NMTIMES r n m) s)" - by (simp add: LV_NMTIMES_6) + by (simp add: LV_NMTIMES_6) +next + case (NOT r s) + then show ?case sorry qed @@ -1281,7 +1244,7 @@ Posix :: "string \ rexp \ val \ bool" ("_ \ _ \ _" [100, 100, 100] 100) where Posix_ONE: "[] \ ONE \ Void" -| Posix_CHAR: "[c] \ (CHAR c) \ (Char c)" +| Posix_CHAR: "[c] \ (CH c) \ (Char c)" | Posix_ALT1: "s \ r1 \ v \ s \ (ALT r1 r2) \ (Left v)" | Posix_ALT2: "\s \ r2 \ v; s \ L(r1)\ \ s \ (ALT r1 r2) \ (Right v)" | Posix_SEQ: "\s1 \ r1 \ v1; s2 \ r2 \ v2; @@ -1320,7 +1283,7 @@ inductive_cases Posix_elims: "s \ ZERO \ v" "s \ ONE \ v" - "s \ CHAR c \ v" + "s \ CH c \ v" "s \ ALT r1 r2 \ v" "s \ SEQ r1 r2 \ v" "s \ STAR r \ v" @@ -1396,7 +1359,7 @@ then show "Void = v2" by cases auto next case (Posix_CHAR c v2) - have "[c] \ CHAR c \ v2" by fact + have "[c] \ CH c \ v2" by fact then show "Char c = v2" by cases auto next case (Posix_ALT1 s r1 v r2 v2) @@ -1503,14 +1466,9 @@ using Posix1(1) Posix1(2) apply blast apply(case_tac n) apply(simp) - apply(simp) - apply(drule_tac x="va" in meta_spec) - apply(drule_tac x="vs" in meta_spec) - apply(simp) - apply(drule meta_mp) - apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil diff_Suc_1 local.Posix_FROMNTIMES1(4) val.inject(5)) - apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil) - by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) self_append_conv self_append_conv2) + apply(simp) + apply (smt (verit, ccfv_threshold) L.simps(9) Posix1(1) UN_E append_eq_append_conv2) + by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) append_Nil2 append_self_conv2) moreover have IHs: "\v2. s1 \ r \ v2 \ v = v2" "\v2. s2 \ FROMNTIMES r (n - 1) \ v2 \ Stars vs = v2" by fact+ @@ -1555,18 +1513,8 @@ apply(case_tac m) apply(simp) apply(simp) - apply(drule_tac x="va" in meta_spec) - apply(drule_tac x="vs" in meta_spec) - apply(simp) - apply(drule meta_mp) - apply(drule Posix1(1)) - apply(drule Posix1(1)) - apply(drule Posix1(1)) - apply(frule Posix1(1)) - apply(simp) - using Posix_NMTIMES1.hyps(4) apply force - apply (metis L.simps(10) Posix1(1) UN_E append_Nil2 append_self_conv2) - by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append.right_neutral append_Nil) + apply (metis L.simps(10) Posix1(1) UN_E append.right_neutral append_Nil) + by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append_Nil self_append_conv) moreover have IHs: "\v2. s1 \ r \ v2 \ v = v2" "\v2. s2 \ NMTIMES r (n - 1) (m - 1) \ v2 \ Stars vs = v2" by fact+ @@ -1616,7 +1564,6 @@ apply(auto simp add: intro!: Prf.intros elim!: Prf_elims)[2] apply (metis (mono_tags, lifting) Prf.intros(9) append_Nil empty_iff flat_Stars flats_empty list.set(1) mem_Collect_eq) apply(simp) - apply(clarify) apply(case_tac n) apply(simp) apply(simp) @@ -1632,7 +1579,6 @@ apply(simp) apply(simp) apply(simp) - apply(clarify) apply(erule Prf_elims) apply(simp) apply(rule Prf.intros) @@ -1660,7 +1606,6 @@ apply(simp) apply (metis Prf.intros(11) append_Nil empty_iff list.set(1)) apply(simp) - apply(clarify) apply(rotate_tac 6) apply(erule Prf_elims) apply(simp) @@ -1675,7 +1620,6 @@ apply(simp) apply(simp) apply(simp) - apply(clarify) apply(rotate_tac 6) apply(erule Prf_elims) apply(simp) diff -r 6291181fad07 -r 1500f96707b0 thys2/Paper/Paper.thy --- a/thys2/Paper/Paper.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys2/Paper/Paper.thy Sun Jan 30 23:37:29 2022 +0000 @@ -7,7 +7,342 @@ "../SizeBound4" "HOL-Library.LaTeXsugar" begin + +declare [[show_question_marks = false]] + +notation (latex output) + If ("(\<^latex>\\\textrm{\if\<^latex>\}\ (_)/ \<^latex>\\\textrm{\then\<^latex>\}\ (_)/ \<^latex>\\\textrm{\else\<^latex>\}\ (_))" 10) and + Cons ("_\<^latex>\\\mbox{$\\,$}\::\<^latex>\\\mbox{$\\,$}\_" [75,73] 73) + + +abbreviation + "der_syn r c \ der c r" + +notation (latex output) + der_syn ("_\\_" [79, 1000] 76) and + + ZERO ("\<^bold>0" 81) and + ONE ("\<^bold>1" 81) and + CH ("_" [1000] 80) and + ALT ("_ + _" [77,77] 78) and + SEQ ("_ \ _" [77,77] 78) and + STAR ("_\<^sup>\" [79] 78) and + + val.Void ("Empty" 78) and + val.Char ("Char _" [1000] 78) and + val.Left ("Left _" [79] 78) and + val.Right ("Right _" [1000] 78) and + val.Seq ("Seq _ _" [79,79] 78) and + val.Stars ("Stars _" [79] 78) and + + Posix ("'(_, _') \ _" [63,75,75] 75) and + + flat ("|_|" [75] 74) and + flats ("|_|" [72] 74) and + injval ("inj _ _ _" [79,77,79] 76) and + mkeps ("mkeps _" [79] 76) and + length ("len _" [73] 73) and + set ("_" [73] 73) and + + AZERO ("ZERO" 81) and + AONE ("ONE _" [79] 81) and + ACHAR ("CHAR _ _" [79, 79] 80) and + AALTs ("ALTs _ _" [77,77] 78) and + ASEQ ("SEQ _ _ _" [79, 77,77] 78) and + ASTAR ("STAR _ _" [79, 79] 78) and + + code ("code _" [79] 74) and + intern ("_\<^latex>\\\mbox{$^\\uparrow$}\" [900] 80) and + erase ("_\<^latex>\\\mbox{$^\\downarrow$}\" [1000] 74) and + bnullable ("nullable\<^latex>\\\mbox{$_b$}\ _" [1000] 80) and + bmkeps ("mkeps\<^latex>\\\mbox{$_b$}\ _" [1000] 80) + + +lemma better_retrieve: + shows "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" + and "rs \ Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" + apply (metis list.exhaust retrieve.simps(4)) + by (metis list.exhaust retrieve.simps(5)) + (*>*) + +section {* Introduction *} + +text {* + +In the last fifteen or so years, Brzozowski's derivatives of regular +expressions have sparked quite a bit of interest in the functional +programming and theorem prover communities. The beauty of +Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly +expressible in any functional language, and easily definable and +reasoned about in theorem provers---the definitions just consist of +inductive datatypes and simple recursive functions. A mechanised +correctness proof of Brzozowski's matcher in for example HOL4 has been +mentioned by Owens and Slind~\cite{Owens2008}. Another one in +Isabelle/HOL is part of the work by Krauss and Nipkow +\cite{Krauss2011}. And another one in Coq is given by Coquand and +Siles \cite{Coquand2012}. + + +The notion of derivatives +\cite{Brzozowski1964}, written @{term "der c r"}, of a regular +expression give a simple solution to the problem of matching a string +@{term s} with a regular expression @{term r}: if the derivative of +@{term r} w.r.t.\ (in succession) all the characters of the string +matches the empty string, then @{term r} matches @{term s} (and {\em +vice versa}). The derivative has the property (which may almost be +regarded as its specification) that, for every string @{term s} and +regular expression @{term r} and character @{term c}, one has @{term +"cs \ L(r)"} if and only if \mbox{@{term "s \ L(der c r)"}}. + + +If a regular expression matches a string, then in general there is more +than one way of how the string is matched. There are two commonly used +disambiguation strategies to generate a unique answer: one is called +GREEDY matching \cite{Frisch2004} and the other is POSIX +matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}. +For example consider the string @{term xy} and the regular expression +\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be +matched in two `iterations' by the single letter-regular expressions +@{term x} and @{term y}, or directly in one iteration by @{term xy}. The +first case corresponds to GREEDY matching, which first matches with the +left-most symbol and only matches the next symbol in case of a mismatch +(this is greedy in the sense of preferring instant gratification to +delayed repletion). The second case is POSIX matching, which prefers the +longest match. + + +\begin{center} +\begin{tabular}{cc} + \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} + @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ + @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ + @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ + @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\ + & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\ + & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\ + % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} + \end{tabular} + & + \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} + @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ + @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ + @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ + @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ + \end{tabular} +\end{tabular} +\end{center} + + +\begin{figure}[t] +\begin{center} +\begin{tikzpicture}[scale=2,node distance=1.3cm, + every node/.style={minimum size=6mm}] +\node (r1) {@{term "r\<^sub>1"}}; +\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; +\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; +\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; +\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; +\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; +\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; +\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; +\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; +\draw[->,line width=1mm](r4) -- (v4); +\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; +\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\inj r\<^sub>3 c\}; +\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; +\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\inj r\<^sub>2 b\}; +\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; +\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\inj r\<^sub>1 a\}; +\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; +\end{tikzpicture} +\end{center} +\mbox{}\\[-13mm] + +\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, +matching the string @{term "[a,b,c]"}. The first phase (the arrows from +left to right) is \Brz's matcher building successive derivatives. If the +last regular expression is @{term nullable}, then the functions of the +second phase are called (the top-down and right-to-left arrows): first +@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing +how the empty string has been recognised by @{term "r\<^sub>4"}. After +that the function @{term inj} ``injects back'' the characters of the string into +the values. +\label{Sulz}} +\end{figure} + + +*} + +section {* Background *} + +text {* + Sulzmann-Lu algorithm with inj. State that POSIX rules. + metion slg is correct. + + + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + @{thm[mode=Axiom] Posix.intros(1)}\P\@{term "ONE"} \qquad + @{thm[mode=Axiom] Posix.intros(2)}\P\@{term "c"}\medskip\\ + @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\P+L\\qquad + @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\P+R\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad + @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ + @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} + {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\PS\\\ + @{thm[mode=Axiom] Posix.intros(7)}\P[]\\medskip\\ + $\mprset{flushleft} + \inferrule + {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad + @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ + @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} + {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\P\\ + \end{tabular} + \end{center} + \caption{Our inductive definition of POSIX values.}\label{POSIXrules} + \end{figure} + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ + @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ + \end{tabular} + \end{center} + + \begin{center} + \begin{tabular}{l@ {\hspace{5mm}}lcl} + \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ + \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & + @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ + \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & + @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ + \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ + & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ + \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ + & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ + \end{tabular} + \end{center} + +*} + +section {* Bitcoded Regular Expressions and Derivatives *} + +text {* + bitcoded regexes / decoding / bmkeps + gets rid of the second phase (only single phase) + correctness + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ + @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ + @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ + @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}\\ + @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ + @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)} + \end{tabular} + \end{center} + + + The idea of the bitcodes is to annotate them to regular expressions and generate values + incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value. + + \begin{center} + \begin{tabular}{lcl} + @{term breg} & $::=$ & @{term "AZERO"}\\ + & $\mid$ & @{term "AONE bs"}\\ + & $\mid$ & @{term "ACHAR bs c"}\\ + & $\mid$ & @{term "AALTs bs rs"}\\ + & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\ + & $\mid$ & @{term "ASTAR bs r"} + \end{tabular} + \end{center} + + + + \begin{center} + \begin{tabular}{lcl} + @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\ + @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\ + @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\ + @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\ + @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\ + @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]} + & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\ + @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\ + @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)} + \end{tabular} + \end{center} + + + \begin{theorem} + @{thm blexer_correctness} + \end{theorem} +*} + + +section {* Simplification *} + +text {* + Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates. + + not direct correspondence with PDERs, because of example + problem with retrieve + + correctness + + + + + + + \begin{figure}[t] + \begin{center} + \begin{tabular}{c} + @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad + @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad + @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\ + @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad + @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\ + @{thm[mode=Axiom] bs6}\qquad + @{thm[mode=Axiom] bs7}\\ + @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Axiom] ss1}\qquad + @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad + @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\ + @{thm[mode=Axiom] ss4}\qquad + @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\ + @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\ + \end{tabular} + \end{center} + \caption{???}\label{SimpRewrites} + \end{figure} +*} + +section {* Bound - NO *} + +section {* Bounded Regex / Not *} + +section {* Conclusion *} + text {* \cite{AusafDyckhoffUrban2016} diff -r 6291181fad07 -r 1500f96707b0 thys2/Paper/document/root.tex --- a/thys2/Paper/document/root.tex Sun Jan 30 23:36:31 2022 +0000 +++ b/thys2/Paper/document/root.tex Sun Jan 30 23:37:29 2022 +0000 @@ -45,33 +45,59 @@ \titlerunning{POSIX Lexing with Bitcoded Derivatives} \author{Chengsong Tan}{King's College London}{chengsong.tan@kcl.ac.uk}{}{} \author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{} +\authorrunning{C.~Tan and C.~Urban} +\keywords{POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL} +\category{} +\ccsdesc[100]{Design and analysis of algorithms} +\ccsdesc[100]{Formal languages and automata theory} +\Copyright{\mbox{}} +\nolinenumbers \begin{document} \maketitle \begin{abstract} -Brzozowski introduced the notion of derivatives for regular -expressions. They can be used for a very simple regular expression -matching algorithm. Sulzmann and Lu cleverly extended this algorithm -in order to deal with POSIX matching, which is the underlying -disambiguation strategy for regular expressions needed in lexers. -Their algorithm generates POSIX values which encode the information of -\emph{how} a regular expression matches a string---that is, which part -of the string is matched by which part of the regular expression. In -this paper we give our inductive definition of what a POSIX value is -and show $(i)$ that such a value is unique (for given regular -expression and string being matched) and $(ii)$ that Sulzmann and Lu's -algorithm always generates such a value (provided that the regular -expression matches the string). We show that $(iii)$ our inductive -definition of a POSIX value is equivalent to an alternative definition -by Okui and Suzuki which identifies POSIX values as least elements -according to an ordering of values. We also prove the correctness of -Sulzmann's bitcoded version of the POSIX matching algorithm and extend the -results to additional constructors for regular expressions. \smallskip - -{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions, -Isabelle/HOL + Sulzmann and Lu described a lexing algorithm that calculates + Brzozowski derivatives using bitcodes annotated to regular + expressions. Their algorithm generates POSIX values which encode + the information of \emph{how} a regular expression matches a + string---that is, which part of the string is matched by which part + of the regular expression. The purpose of the bitcodes is to generate POSIX values incrementally while + derivatives are calculated. They also help with designing + an `aggressive' simplification function that keeps the size of + derivatives small. Without simplification derivatives can grow + exponentially resulting in an extremely slow lexing algorithm. In this + paper we describe a variant of Sulzmann and Lu's algorithm: Our + algorithm is a recursive functional program, whereas Sulzmann + and Lu's version involves a fixpoint construction. We \textit{(i)} + prove in Isabelle/HOL that our program is correct and generates + unique POSIX values; we also \textit{(ii)} establish a polynomial + bound for the size of the derivatives. The size can be seen as a + proxy measure for the efficiency of the lexing algorithm: because of + the polynomial bound our algorithm does not suffer from + the exponential blowup in earlier works. + + % Brzozowski introduced the notion of derivatives for regular + % expressions. They can be used for a very simple regular expression + % matching algorithm. Sulzmann and Lu cleverly extended this + % algorithm in order to deal with POSIX matching, which is the + % underlying disambiguation strategy for regular expressions needed + % in lexers. Their algorithm generates POSIX values which encode + % the information of \emph{how} a regular expression matches a + % string---that is, which part of the string is matched by which + % part of the regular expression. In this paper we give our + % inductive definition of what a POSIX value is and show $(i)$ that + % such a value is unique (for given regular expression and string + % being matched) and $(ii)$ that Sulzmann and Lu's algorithm always + % generates such a value (provided that the regular expression + % matches the string). We show that $(iii)$ our inductive definition + % of a POSIX value is equivalent to an alternative definition by + % Okui and Suzuki which identifies POSIX values as least elements + % according to an ordering of values. We also prove the correctness + % of Sulzmann's bitcoded version of the POSIX matching algorithm and + % extend the results to additional constructors for regular + % expressions. \smallskip \end{abstract} diff -r 6291181fad07 -r 1500f96707b0 thys2/SizeBound4.thy --- a/thys2/SizeBound4.thy Sun Jan 30 23:36:31 2022 +0000 +++ b/thys2/SizeBound4.thy Sun Jan 30 23:37:29 2022 +0000 @@ -132,15 +132,6 @@ apply(auto) done -lemma fuse_Nil: - shows "fuse [] r = r" - by (induct r)(simp_all) - -(* -lemma map_fuse_Nil: - shows "map (fuse []) rs = rs" - by (induct rs)(simp_all add: fuse_Nil) -*) fun intern :: "rexp \ arexp" where "intern ZERO = AZERO" @@ -155,7 +146,7 @@ fun retrieve :: "arexp \ val \ bit list" where "retrieve (AONE bs) Void = bs" | "retrieve (ACHAR bs c) (Char d) = bs" -| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" +| "retrieve (AALTs bs [r]) v = bs @ retrieve r v" | "retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" | "retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" | "retrieve (ASEQ bs r1 r2) (Seq v1 v2) = bs @ retrieve r1 v1 @ retrieve r2 v2" @@ -225,8 +216,8 @@ | "bders r (c#s) = bders (bder c r) s" lemma bders_append: - "bders r (s1 @ s2) = bders (bders r s1) s2" - apply(induct s1 arbitrary: r s2) + "bders c (s1 @ s2) = bders (bders c s1) s2" + apply(induct s1 arbitrary: c s2) apply(simp_all) done @@ -274,29 +265,28 @@ apply(simp_all) done - lemma retrieve_fuse2: assumes "\ v : (erase r)" shows "retrieve (fuse bs r) v = bs @ retrieve r v" using assms apply(induct r arbitrary: v bs) - apply(auto elim: Prf_elims)[4] - defer - using retrieve_encode_STARS - apply(auto elim!: Prf_elims)[1] - apply(case_tac vs) - apply(simp) - apply(simp) - (* AALTs case *) + apply(auto elim: Prf_elims)[4] + apply(case_tac x2a) + apply(simp) + using Prf_elims(1) apply blast + apply(case_tac x2a) + apply(simp) + apply(simp) + apply(case_tac list) apply(simp) - apply(case_tac x2a) - apply(simp) - apply(auto elim!: Prf_elims)[1] + apply(simp) + apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5)) apply(simp) - apply(case_tac list) - apply(simp) - apply(auto) + using retrieve_encode_STARS apply(auto elim!: Prf_elims)[1] + apply(case_tac vs) + apply(simp) + apply(simp) done lemma retrieve_fuse: @@ -314,139 +304,92 @@ apply(simp_all add: retrieve_fuse retrieve_encode_STARS) done -(* -lemma bnullable_Hdbmkeps_Hd: - assumes "bnullable a" - shows "bmkeps (AALTs bs (a # rs)) = bs @ (bmkeps a)" - using assms by simp -*) - -lemma r2: - assumes "x \ set rs" "bnullable x" - shows "bnullable (AALTs bs rs)" +lemma retrieve_AALTs_bnullable1: + assumes "bnullable r" + shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) + = bs @ retrieve r (mkeps (erase r))" using assms - apply(induct rs) - apply(auto) + apply(case_tac rs) + apply(auto simp add: bnullable_correctness) done -lemma r3: - assumes "\ bnullable r" - "bnullables rs" - shows "retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs))) = - retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))" +lemma retrieve_AALTs_bnullable2: + assumes "\bnullable r" "bnullables rs" + shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs)))) + = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" using assms apply(induct rs arbitrary: r bs) - apply(auto)[1] apply(auto) using bnullable_correctness apply blast - apply(auto simp add: bnullable_correctness mkeps_nullable retrieve_fuse2) - apply(subst retrieve_fuse2[symmetric]) - apply (smt bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable) - apply(simp) - apply(case_tac "bnullable a") - apply (smt append_Nil2 bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) fuse.simps(4) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable retrieve_fuse2) - apply(drule_tac x="a" in meta_spec) - apply(drule_tac x="bs" in meta_spec) - apply(drule meta_mp) - apply(simp) - apply(drule meta_mp) - apply(auto) - apply(subst retrieve_fuse2[symmetric]) apply(case_tac rs) - apply(simp) - apply(auto)[1] - apply (simp add: bnullable_correctness) - - apply (metis append_Nil2 bnullable_correctness erase_fuse fuse.simps(4) list.set_intros(1) mkeps.simps(3) mkeps_nullable nullable.simps(4) r2) - apply (simp add: bnullable_correctness) - apply (metis append_Nil2 bnullable_correctness erase.simps(6) erase_fuse fuse.simps(4) list.set_intros(2) mkeps.simps(3) mkeps_nullable r2) - apply(simp) + apply(auto) + using bnullable_correctness apply blast + apply(case_tac rs) + apply(auto) done -lemma t: +lemma bmkeps_retrieve_AALTs: assumes "\r \ set rs. bnullable r \ bmkeps r = retrieve r (mkeps (erase r))" "bnullables rs" shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))" using assms apply(induct rs arbitrary: bs) apply(auto) - apply (metis (no_types, opaque_lifting) bmkepss.cases bnullable_correctness erase.simps(5) erase.simps(6) mkeps.simps(3) retrieve.simps(3) retrieve.simps(4)) - apply (metis r3) - apply (metis (no_types, lifting) bmkepss.cases bnullable_correctness empty_iff erase.simps(6) list.set(1) mkeps.simps(3) retrieve.simps(4)) - apply (metis r3) - done - + using retrieve_AALTs_bnullable1 apply presburger + apply (metis retrieve_AALTs_bnullable2) + apply (simp add: retrieve_AALTs_bnullable1) + by (metis retrieve_AALTs_bnullable2) + + lemma bmkeps_retrieve: assumes "bnullable r" shows "bmkeps r = retrieve r (mkeps (erase r))" using assms apply(induct r) - apply(auto) - using t by auto + apply(auto) + using bmkeps_retrieve_AALTs by auto lemma bder_retrieve: assumes "\ v : der c (erase r)" shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)" using assms apply(induct r arbitrary: v rule: erase.induct) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(case_tac "c = ca") - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v) - apply(erule Prf_elims) - apply(simp) - apply(simp) - apply(case_tac rs) - apply(simp) - apply(simp) - apply (smt Prf_elims(3) injval.simps(2) injval.simps(3) retrieve.simps(4) retrieve.simps(5) same_append_eq) - apply(simp) - apply(case_tac "nullable (erase r1)") - apply(simp) - apply(erule Prf_elims) - apply(subgoal_tac "bnullable r1") - prefer 2 - using bnullable_correctness apply blast - apply(simp) - apply(erule Prf_elims) - apply(simp) - apply(subgoal_tac "bnullable r1") - prefer 2 - using bnullable_correctness apply blast - apply(simp) - apply(simp add: retrieve_fuse2) - apply(simp add: bmkeps_retrieve) - apply(simp) - apply(erule Prf_elims) - apply(simp) - using bnullable_correctness apply blast - apply(rename_tac bs r v) + using Prf_elims(1) apply auto[1] + using Prf_elims(1) apply auto[1] + apply(auto)[1] + apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2)) + using Prf_elims(1) apply blast + (* AALTs case *) apply(simp) apply(erule Prf_elims) - apply(clarify) + apply(simp) + apply(simp) + apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v) + apply(erule Prf_elims) + apply(simp) + apply(simp) + apply(case_tac rs) + apply(simp) + apply(simp) + using Prf_elims(3) apply fastforce + (* ASEQ case *) + apply(simp) + apply(case_tac "nullable (erase r1)") + apply(simp) + apply(erule Prf_elims) + using Prf_elims(2) bnullable_correctness apply force + apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) + apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2) + using Prf_elims(2) apply force + (* ASTAR case *) + apply(rename_tac bs r v) + apply(simp) apply(erule Prf_elims) apply(clarify) - apply(subst injval.simps) - apply(simp del: retrieve.simps) - apply(subst retrieve.simps) - apply(subst retrieve.simps) - apply(simp) - apply(simp add: retrieve_fuse2) - done - + apply(erule Prf_elims) + apply(clarify) + by (simp add: retrieve_fuse2) lemma MAIN_decode: @@ -514,7 +457,7 @@ (if (f x) \ acc then distinctBy xs f acc else x # (distinctBy xs f ({f x} \ acc)))" - + fun flts :: "arexp list \ arexp list" where @@ -581,84 +524,12 @@ apply(simp_all) done -lemma L_bsimp_ASEQ: - "L (erase (ASEQ bs r1 r2)) = L (erase (bsimp_ASEQ bs r1 r2))" - apply(induct bs r1 r2 rule: bsimp_ASEQ.induct) - apply(simp_all) - by (metis erase_fuse fuse.simps(4)) - -lemma L_bsimp_AALTs: - "L (erase (AALTs bs rs)) = L (erase (bsimp_AALTs bs rs))" - apply(induct bs rs rule: bsimp_AALTs.induct) - apply(simp_all add: erase_fuse) - done - -lemma L_erase_AALTs: - shows "L (erase (AALTs bs rs)) = \ (L ` erase ` (set rs))" - apply(induct rs) - apply(simp) - apply(simp) - apply(case_tac rs) - apply(simp) - apply(simp) - done - -lemma L_erase_flts: - shows "\ (L ` erase ` (set (flts rs))) = \ (L ` erase ` (set rs))" - apply(induct rs rule: flts.induct) - apply(simp_all) - apply(auto) - using L_erase_AALTs erase_fuse apply auto[1] - by (simp add: L_erase_AALTs erase_fuse) - -lemma L_erase_dB_acc: - shows "(\ (L ` acc) \ (\ (L ` erase ` (set (distinctBy rs erase acc))))) - = \ (L ` acc) \ \ (L ` erase ` (set rs))" - apply(induction rs arbitrary: acc) - apply simp_all - by (smt (z3) SUP_absorb UN_insert sup_assoc sup_commute) - - -lemma L_erase_dB: - shows "(\ (L ` erase ` (set (distinctBy rs erase {})))) = \ (L ` erase ` (set rs))" - by (metis L_erase_dB_acc Un_commute Union_image_empty) - -lemma L_bsimp_erase: - shows "L (erase r) = L (erase (bsimp r))" - apply(induct r) - apply(simp) - apply(simp) - apply(simp) - apply(auto simp add: Sequ_def)[1] - apply(subst L_bsimp_ASEQ[symmetric]) - apply(auto simp add: Sequ_def)[1] - apply(subst (asm) L_bsimp_ASEQ[symmetric]) - apply(auto simp add: Sequ_def)[1] - apply(simp) - apply(subst L_bsimp_AALTs[symmetric]) - defer - apply(simp) - apply(subst (2)L_erase_AALTs) - apply(subst L_erase_dB) - apply(subst L_erase_flts) - apply (simp add: L_erase_AALTs) - done - -lemma L_bders_simp: - shows "L (erase (bders_simp r s)) = L (erase (bders r s))" - apply(induct s arbitrary: r rule: rev_induct) - apply(simp) - apply(simp) - apply(simp add: ders_append) - apply(simp add: bders_simp_append) - apply(simp add: L_bsimp_erase[symmetric]) - by (simp add: der_correctness) - lemma bmkeps_fuse: assumes "bnullable r" shows "bmkeps (fuse bs r) = bs @ bmkeps r" - by (metis assms bmkeps_retrieve bnullable_correctness erase_fuse mkeps_nullable retrieve_fuse2) + using assms + by (metis bmkeps_retrieve bnullable_correctness erase_fuse mkeps_nullable retrieve_fuse2) lemma bmkepss_fuse: assumes "bnullables rs" @@ -668,21 +539,10 @@ apply(auto simp add: bmkeps_fuse bnullable_fuse) done - -lemma b4: - shows "bnullable (bders_simp r s) = bnullable (bders r s)" -proof - - have "L (erase (bders_simp r s)) = L (erase (bders r s))" - using L_bders_simp by force - then show "bnullable (bders_simp r s) = bnullable (bders r s)" - using bnullable_correctness nullable_correctness by blast -qed - - lemma bder_fuse: shows "bder c (fuse bs a) = fuse bs (bder c a)" apply(induct a arbitrary: bs c) - apply(simp_all) + apply(simp_all) done @@ -700,7 +560,7 @@ | bs5: "r3 \ r4 \ ASEQ bs r1 r3 \ ASEQ bs r1 r4" | bs6: "AALTs bs [] \ AZERO" | bs7: "AALTs bs [r] \ fuse bs r" -| bs10: "rs1 s\ rs2 \ AALTs bs rs1 \ AALTs bs rs2" +| bs8: "rs1 s\ rs2 \ AALTs bs rs1 \ AALTs bs rs2" | ss1: "[] s\ []" | ss2: "rs1 s\ rs2 \ (r # rs1) s\ (r # rs2)" | ss3: "r1 \ r2 \ (r1 # rs) s\ (r2 # rs)" @@ -726,11 +586,6 @@ shows "r1 \ r2 \ r1 \* r2" using rrewrites.intros(1) rrewrites.intros(2) by blast -lemma srewrites_single : - shows "rs1 s\ rs2 \ rs1 s\* rs2" - using rrewrites.intros(1) rrewrites.intros(2) by blast - - lemma rrewrites_trans[trans]: assumes a1: "r1 \* r2" and a2: "r2 \* r3" shows "r1 \* r3" @@ -753,13 +608,13 @@ "rs1 s\* rs2 \ AALTs bs rs1 \* AALTs bs rs2" apply(induct rs1 rs2 rule: srewrites.inducts) apply simp - using bs10 r_in_rstar rrewrites_trans by blast + using bs8 r_in_rstar rrewrites_trans by blast lemma contextrewrites1: "r \* r' \ AALTs bs (r # rs) \* AALTs bs (r' # rs)" apply(induct r r' rule: rrewrites.induct) apply simp - using bs10 ss3 by blast + using bs8 ss3 by blast lemma srewrite1: shows "rs1 s\ rs2 \ (rs @ rs1) s\ (rs @ rs2)" @@ -777,9 +632,9 @@ shows "r1 \ r2 \ True" and "rs1 s\ rs2 \ (rs1 @ rs) s\* (rs2 @ rs)" apply(induct rule: rrewrite_srewrite.inducts) - apply(auto) - apply (metis append_Cons append_Nil srewrites1) - apply(meson srewrites.simps ss3) + apply(auto) + apply (metis append_Cons append_Nil srewrites1) + apply(meson srewrites.simps ss3) apply (meson srewrites.simps ss4) apply (meson srewrites.simps ss5) by (metis append_Cons append_Nil srewrites.simps ss6) @@ -806,15 +661,15 @@ shows "[r1] s\* [r2]" using assms apply(induct r1 r2 rule: rrewrites.induct) - apply(auto) + apply(auto) by (meson srewrites.simps srewrites_trans ss3) lemma srewrites7: assumes "rs3 s\* rs4" "r1 \* r2" shows "(r1 # rs3) s\* (r2 # rs4)" using assms - by (smt (verit, best) append_Cons append_Nil srewrites1 srewrites3 srewrites6 srewrites_trans) - + by (smt (verit, del_insts) append.simps srewrites1 srewrites3 srewrites6 srewrites_trans) + lemma ss6_stronger_aux: shows "(rs1 @ rs2) s\* (rs1 @ distinctBy rs2 erase (set (map erase rs1)))" apply(induct rs2 arbitrary: rs1) @@ -828,47 +683,47 @@ shows "rs1 s\* distinctBy rs1 erase {}" using ss6_stronger_aux[of "[]" _] by auto - lemma rewrite_preserves_fuse: shows "r2 \ r3 \ fuse bs r2 \ fuse bs r3" - and "rs2 s\ rs3 \ map (fuse bs) rs2 s\* map (fuse bs) rs3" + and "rs2 s\ rs3 \ map (fuse bs) rs2 s\ map (fuse bs) rs3" proof(induct rule: rrewrite_srewrite.inducts) case (bs3 bs1 bs2 r) - then show ?case + then show "fuse bs (ASEQ bs1 (AONE bs2) r) \ fuse bs (fuse (bs1 @ bs2) r)" by (metis fuse.simps(5) fuse_append rrewrite_srewrite.bs3) next - case (bs7 bs r) - then show ?case + case (bs7 bs1 r) + then show "fuse bs (AALTs bs1 [r]) \ fuse bs (fuse bs1 r)" by (metis fuse.simps(4) fuse_append rrewrite_srewrite.bs7) next case (ss2 rs1 rs2 r) - then show ?case - using srewrites7 by force + then show "map (fuse bs) (r # rs1) s\ map (fuse bs) (r # rs2)" + by (simp add: rrewrite_srewrite.ss2) next case (ss3 r1 r2 rs) - then show ?case by (simp add: r_in_rstar srewrites7) + then show "map (fuse bs) (r1 # rs) s\ map (fuse bs) (r2 # rs)" + by (simp add: rrewrite_srewrite.ss3) next case (ss5 bs1 rs1 rsb) - then show ?case - apply(simp) - by (metis (mono_tags, lifting) comp_def fuse_append map_eq_conv rrewrite_srewrite.ss5 srewrites.simps) + have "map (fuse bs) (AALTs bs1 rs1 # rsb) = AALTs (bs @ bs1) rs1 # (map (fuse bs) rsb)" by simp + also have "... s\ ((map (fuse (bs @ bs1)) rs1) @ (map (fuse bs) rsb))" + by (simp add: rrewrite_srewrite.ss5) + finally show "map (fuse bs) (AALTs bs1 rs1 # rsb) s\ map (fuse bs) (map (fuse bs1) rs1 @ rsb)" + by (simp add: comp_def fuse_append) next case (ss6 a1 a2 rsa rsb rsc) - then show ?case + then show "map (fuse bs) (rsa @ [a1] @ rsb @ [a2] @ rsc) s\ map (fuse bs) (rsa @ [a1] @ rsb @ rsc)" apply(simp) - apply(rule srewrites_single) apply(rule rrewrite_srewrite.ss6[simplified]) apply(simp add: erase_fuse) done - qed (auto intro: rrewrite_srewrite.intros) - +qed (auto intro: rrewrite_srewrite.intros) lemma rewrites_fuse: assumes "r1 \* r2" shows "fuse bs r1 \* fuse bs r2" using assms apply(induction r1 r2 arbitrary: bs rule: rrewrites.induct) -apply(auto intro: rewrite_preserves_fuse rrewrites_trans) +apply(auto intro: rewrite_preserves_fuse) done @@ -911,13 +766,12 @@ by (smt (verit) bs6 bs7 bsimp_AALTs.elims rrewrites.simps) lemma trivialbsimp_srewrites: - "\\x. x \ set rs \ x \* f x \ \ rs s\* (map f rs)" + assumes "\x. x \ set rs \ x \* f x" + shows "rs s\* (map f rs)" +using assms apply(induction rs) - apply simp - apply(simp) - using srewrites7 by auto - - + apply(simp_all add: srewrites7) + done lemma fltsfrewrites: "rs s\* flts rs" apply(induction rs rule: flts.induct) @@ -1044,7 +898,7 @@ lemma to_zero_in_alt: shows "AALT bs (ASEQ [] AZERO r) r2 \ AALT bs AZERO r2" - by (simp add: bs1 bs10 ss3) + by (simp add: bs1 bs8 ss3) @@ -1054,7 +908,6 @@ apply(simp_all add: bder_fuse) done - lemma rewrite_preserves_bder: shows "r1 \ r2 \ bder c r1 \* bder c r2" and "rs1 s\ rs2 \ map (bder c) rs1 s\* map (bder c) rs2" @@ -1072,7 +925,7 @@ case (bs3 bs1 bs2 r) show "bder c (ASEQ bs1 (AONE bs2) r) \* bder c (fuse (bs1 @ bs2) r)" apply(simp) - by (metis (no_types, lifting) bder_fuse bs10 bs7 fuse_append rrewrites.simps ss4 to_zero_in_alt) + by (metis (no_types, lifting) bder_fuse bs8 bs7 fuse_append rrewrites.simps ss4 to_zero_in_alt) next case (bs4 r1 r2 bs r3) have as: "r1 \ r2" by fact @@ -1097,7 +950,7 @@ show "bder c (AALTs bs [r]) \* bder c (fuse bs r)" by (simp add: bder_fuse r_in_rstar rrewrite_srewrite.bs7) next - case (bs10 rs1 rs2 bs) + case (bs8 rs1 rs2 bs) have IH1: "map (bder c) rs1 s\* map (bder c) rs2" by fact then show "bder c (AALTs bs rs1) \* bder c (AALTs bs rs2)" using contextrewrites0 by force @@ -1168,12 +1021,10 @@ qed - - theorem main_blexer_simp: shows "blexer r s = blexer_simp r s" unfolding blexer_def blexer_simp_def - using b4 main_aux by simp + by (metis central main_aux rewrites_bnullable_eq) theorem blexersimp_correctness: @@ -1181,6 +1032,79 @@ using blexer_correctness main_blexer_simp by simp +(* below is the idempotency of bsimp *) + +lemma bsimp_ASEQ_fuse: + shows "fuse bs1 (bsimp_ASEQ bs2 r1 r2) = bsimp_ASEQ (bs1 @ bs2) r1 r2" + apply(induct r1 r2 arbitrary: bs1 bs2 rule: bsimp_ASEQ.induct) + apply(auto) + done + +lemma bsimp_AALTs_fuse: + assumes "\r \ set rs. fuse bs1 (fuse bs2 r) = fuse (bs1 @ bs2) r" + shows "fuse bs1 (bsimp_AALTs bs2 rs) = bsimp_AALTs (bs1 @ bs2) rs" + using assms + apply(induct bs2 rs arbitrary: bs1 rule: bsimp_AALTs.induct) + apply(auto) + done + +lemma bsimp_fuse: + shows "fuse bs (bsimp r) = bsimp (fuse bs r)" + apply(induct r arbitrary: bs) + apply(simp_all add: bsimp_ASEQ_fuse bsimp_AALTs_fuse fuse_append) + done + +lemma bsimp_ASEQ_idem: + assumes "bsimp (bsimp r1) = bsimp r1" "bsimp (bsimp r2) = bsimp r2" + shows "bsimp (bsimp_ASEQ x1 (bsimp r1) (bsimp r2)) = bsimp_ASEQ x1 (bsimp r1) (bsimp r2)" + using assms + apply(case_tac "bsimp r1 = AZERO") + apply(simp) + apply(case_tac "bsimp r2 = AZERO") + apply(simp) + apply(case_tac "\bs. bsimp r1 = AONE bs") + apply(auto)[1] + apply (metis bsimp_fuse) + apply(simp add: bsimp_ASEQ1) + done + +lemma bsimp_AALTs_idem: + assumes "\r \ set rs. bsimp (bsimp r) = bsimp r" + shows "bsimp (bsimp_AALTs bs rs) = bsimp_AALTs bs (map bsimp rs)" + using assms + apply(induct bs rs rule: bsimp_AALTs.induct) + apply(simp) + apply(simp) + using bsimp_fuse apply presburger + oops + +lemma bsimp_idem_rev: + shows "\r2. bsimp r1 \ r2" + apply(induct r1 rule: bsimp.induct) + apply(auto) + defer + defer + using rrewrite.simps apply blast + using rrewrite.cases apply blast + using rrewrite.simps apply blast + using rrewrite.cases apply blast + apply(case_tac "bsimp r1 = AZERO") + apply(simp) + apply(case_tac "bsimp r2 = AZERO") + apply(simp) + apply(case_tac "\bs. bsimp r1 = AONE bs") + apply(auto)[1] + prefer 2 + apply (smt (verit, best) arexp.distinct(25) arexp.inject(3) bsimp_ASEQ1 rrewrite.simps) + defer + oops + +lemma bsimp_idem: + shows "bsimp (bsimp r) = bsimp r" + apply(induct r rule: bsimp.induct) + apply(auto) + using bsimp_ASEQ_idem apply presburger + oops export_code blexer_simp blexer lexer bders bders_simp in Scala module_name VerifiedLexers diff -r 6291181fad07 -r 1500f96707b0 thys2/paper.pdf Binary file thys2/paper.pdf has changed