--- a/thys/LexerExt.thy Tue Jan 25 13:12:50 2022 +0000
+++ b/thys/LexerExt.thy Thu Jan 27 23:25:26 2022 +0000
@@ -17,10 +17,15 @@
| "mkeps(NTIMES r n) = Stars (replicate n (mkeps r))"
| "mkeps(FROMNTIMES r n) = Stars (replicate n (mkeps r))"
| "mkeps(NMTIMES r n m) = Stars (replicate n (mkeps r))"
-
+| "mkeps(NOT ZERO) = Nt Void"
+| "mkeps(NOT (CH _ )) = Nt Void"
+| "mkeps(NOT (SEQ r1 r2)) = Seq (mkeps (NOT r1)) (mkeps (NOT r1))"
+| "mkeps(NOT (ALT r1 r2)) = (if nullable(r1) then Right (mkeps (NOT r2)) else (mkeps (NOT r1)))"
+
+
fun injval :: "rexp \<Rightarrow> char \<Rightarrow> val \<Rightarrow> val"
where
- "injval (CHAR d) c Void = Char d"
+ "injval (CH d) c Void = Char d"
| "injval (ALT r1 r2) c (Left v1) = Left(injval r1 c v1)"
| "injval (ALT r1 r2) c (Right v2) = Right(injval r2 c v2)"
| "injval (SEQ r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
@@ -49,9 +54,11 @@
shows "flat (mkeps r) = []"
using assms
apply(induct rule: nullable.induct)
- apply(auto)
- by presburger
-
+ apply(auto)
+ apply presburger
+ apply(case_tac r)
+ apply(auto)
+ sorry
lemma mkeps_nullable:
assumes "nullable(r)"
@@ -81,7 +88,7 @@
apply (simp add: mkeps_flat)
apply(simp)
apply(simp)
-done
+ sorry
lemma Prf_injval_flat:
@@ -90,7 +97,7 @@
using assms
apply(induct arbitrary: v rule: der.induct)
apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits)
-done
+ sorry
lemma Prf_injval:
assumes "\<Turnstile> v : der c r"
@@ -170,7 +177,8 @@
apply(simp add: Prf_injval_flat)
apply(simp)
apply(simp)
-done
+ sorry
+
@@ -186,8 +194,10 @@
apply(auto intro: Posix.intros simp add: nullable_correctness Sequ_def)
apply(subst append.simps(1)[symmetric])
apply(rule Posix.intros)
- apply(auto)
- done
+ apply(auto)
+ apply(case_tac r)
+ apply(auto)
+ sorry
lemma Posix_injval:
@@ -207,22 +217,22 @@
then have "False" by cases
then show "(c # s) \<in> ONE \<rightarrow> (injval ONE c v)" by simp
next
- case (CHAR d)
+ case (CH d)
consider (eq) "c = d" | (ineq) "c \<noteq> d" by blast
- then show "(c # s) \<in> (CHAR d) \<rightarrow> (injval (CHAR d) c v)"
+ then show "(c # s) \<in> (CH d) \<rightarrow> (injval (CH d) c v)"
proof (cases)
case eq
- have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+ have "s \<in> der c (CH d) \<rightarrow> v" by fact
then have "s \<in> ONE \<rightarrow> v" using eq by simp
then have eqs: "s = [] \<and> v = Void" by cases simp
- show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" using eq eqs
+ show "(c # s) \<in> CH d \<rightarrow> injval (CH d) c v" using eq eqs
by (auto intro: Posix.intros)
next
case ineq
- have "s \<in> der c (CHAR d) \<rightarrow> v" by fact
+ have "s \<in> der c (CH d) \<rightarrow> v" by fact
then have "s \<in> ZERO \<rightarrow> v" using ineq by simp
then have "False" by cases
- then show "(c # s) \<in> CHAR d \<rightarrow> injval (CHAR d) c v" by simp
+ then show "(c # s) \<in> CH d \<rightarrow> injval (CH d) c v" by simp
qed
next
case (ALT r1 r2)
@@ -319,7 +329,6 @@
apply(simp)
apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
apply(clarify)
- apply(drule_tac x="v1" in meta_spec)
apply(drule_tac x="vss" in meta_spec)
apply(drule_tac x="s1" in meta_spec)
apply(drule_tac x="s2" in meta_spec)
@@ -401,7 +410,6 @@
apply(simp)
apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
apply(clarify)
- apply(drule_tac x="v1" in meta_spec)
apply(drule_tac x="vss" in meta_spec)
apply(drule_tac x="s1" in meta_spec)
apply(drule_tac x="s2" in meta_spec)
@@ -454,7 +462,6 @@
apply(simp)
apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
apply(clarify)
- apply(drule_tac x="v1" in meta_spec)
apply(drule_tac x="vss" in meta_spec)
apply(drule_tac x="s1" in meta_spec)
apply(drule_tac x="s2" in meta_spec)
@@ -544,7 +551,6 @@
apply(simp)
apply(subgoal_tac "\<exists>vss. v2 = Stars vss")
apply(clarify)
- apply(drule_tac x="v1" in meta_spec)
apply(drule_tac x="vss" in meta_spec)
apply(drule_tac x="s1" in meta_spec)
apply(drule_tac x="s2" in meta_spec)
@@ -618,6 +624,9 @@
apply(simp)
done
qed
+ next
+ case (NOT r s v)
+ then show ?case sorry
qed
section {* Lexer Correctness *}
--- a/thys/SpecExt.thy Tue Jan 25 13:12:50 2022 +0000
+++ b/thys/SpecExt.thy Thu Jan 27 23:25:26 2022 +0000
@@ -1,6 +1,6 @@
theory SpecExt
- imports Main (*"~~/src/HOL/Library/Sublist"*)
+ imports Main "HOL-Library.Sublist" (*"~~/src/HOL/Library/Sublist"*)
begin
section {* Sequential Composition of Languages *}
@@ -165,10 +165,6 @@
apply(auto simp add: Sequ_def)
done
-lemma
- assumes "[] \<in> A" "n \<noteq> 0" "A \<noteq> {}"
- shows "A \<up> (Suc n) = A \<up> n"
-
lemma Der_Pow_0:
shows "Der c (A \<up> 0) = {}"
by(simp add: Der_def)
@@ -221,7 +217,7 @@
datatype rexp =
ZERO
| ONE
-| CHAR char
+| CH char
| SEQ rexp rexp
| ALT rexp rexp
| STAR rexp
@@ -238,7 +234,7 @@
where
"L (ZERO) = {}"
| "L (ONE) = {[]}"
-| "L (CHAR c) = {[c]}"
+| "L (CH c) = {[c]}"
| "L (SEQ r1 r2) = (L r1) ;; (L r2)"
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
| "L (STAR r) = (L r)\<star>"
@@ -255,7 +251,7 @@
where
"nullable (ZERO) = False"
| "nullable (ONE) = True"
-| "nullable (CHAR c) = False"
+| "nullable (CH c) = False"
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
| "nullable (STAR r) = True"
@@ -270,7 +266,7 @@
where
"der c (ZERO) = ZERO"
| "der c (ONE) = ZERO"
-| "der c (CHAR d) = (if c = d then ONE else ZERO)"
+| "der c (CH d) = (if c = d then ONE else ZERO)"
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
| "der c (SEQ r1 r2) =
(if nullable r1
@@ -340,48 +336,6 @@
by (metis append.assoc)
-
-lemma
- "L(FROMNTIMES r n) = L(SEQ (NTIMES r n) (STAR r))"
- apply(auto simp add: Sequ_def)
- defer
- apply(subgoal_tac "\<exists>m. s2 \<in> (L r) \<up> m")
- prefer 2
- apply (simp add: Star_Pow)
- apply(auto)
- apply(rule_tac x="n + m" in bexI)
- apply (simp add: pow_add)
- apply simp
- apply(subgoal_tac "\<exists>m. m + n = xa")
- apply(auto)
- prefer 2
- using le_add_diff_inverse2 apply auto[1]
- by (smt Pow_Star Sequ_def add.commute mem_Collect_eq pow_add2)
-
-lemma
- "L (der c (FROMNTIMES r n)) =
- L (SEQ (der c r) (FROMNTIMES r (n - 1)))"
- apply(auto simp add: Sequ_def)
- using Star_Pow apply blast
- using Pow_Star by blast
-
-lemma
- "L (der c (UPNTIMES r n)) =
- L(if n = 0 then ZERO else
- ALT (der c r) (SEQ (der c r) (UPNTIMES r (n - 1))))"
- apply(auto simp add: Sequ_def)
- using SpecExt.Pow_empty by blast
-
-abbreviation "FROM \<equiv> FROMNTIMES"
-
-lemma
- shows "L (der c (FROM r n)) =
- L (if n <= 0 then SEQ (der c r) (ALT ONE (FROM r 0))
- else SEQ (der c r) (ALT ZERO (FROM r (n -1))))"
- apply(auto simp add: Sequ_def)
- oops
-
-
fun
ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
where
@@ -454,8 +408,10 @@
apply(auto simp add: Sequ_def Der_def Cons_eq_append_conv)[1]
apply(rule_tac x="Suc xa" in bexI)
apply(auto simp add: Sequ_def)[2]
-apply (metis append_Cons)
-apply (metis (no_types, hide_lams) Pow_decomp atMost_iff diff_Suc_eq_diff_pred diff_is_0_eq)
+ apply (metis append_Cons)
+ apply(rule_tac x="xa - 1" in bexI)
+ apply(auto simp add: Sequ_def)[2]
+ apply (metis One_nat_def Pow_decomp)
apply(rule impI)+
apply(subst Sequ_Union_in)
apply(subst Der_Pow_Sequ[symmetric])
@@ -485,7 +441,7 @@
| Right val
| Left val
| Stars "val list"
-
+| Nt val
section {* The string behind a value *}
@@ -499,6 +455,7 @@
| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
| "flat (Stars []) = []"
| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))"
+| "flat (Nt v) = flat v"
abbreviation
"flats vs \<equiv> concat (map flat vs)"
@@ -597,7 +554,7 @@
| "\<Turnstile> v1 : r1 \<Longrightarrow> \<Turnstile> Left v1 : ALT r1 r2"
| "\<Turnstile> v2 : r2 \<Longrightarrow> \<Turnstile> Right v2 : ALT r1 r2"
| "\<Turnstile> Void : ONE"
-| "\<Turnstile> Char c : CHAR c"
+| "\<Turnstile> Char c : CH c"
| "\<lbrakk>\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> []\<rbrakk> \<Longrightarrow> \<Turnstile> Stars vs : STAR r"
| "\<lbrakk>\<forall>v \<in> set vs. \<Turnstile> v : r \<and> flat v \<noteq> []; length vs \<le> n\<rbrakk> \<Longrightarrow> \<Turnstile> Stars vs : UPNTIMES r n"
| "\<lbrakk>\<forall>v \<in> set vs1. \<Turnstile> v : r \<and> flat v \<noteq> [];
@@ -622,7 +579,7 @@
"\<Turnstile> v : SEQ r1 r2"
"\<Turnstile> v : ALT r1 r2"
"\<Turnstile> v : ONE"
- "\<Turnstile> v : CHAR c"
+ "\<Turnstile> v : CH c"
"\<Turnstile> vs : STAR r"
"\<Turnstile> vs : UPNTIMES r n"
"\<Turnstile> vs : NTIMES r n"
@@ -867,7 +824,10 @@
then obtain vs where "flats vs = s" "\<forall>v\<in>set vs. \<Turnstile> v : r \<and> flat v \<noteq> []" "length vs \<le> n"
using IH flats_cval_nonempty by (smt order.trans)
then show "\<exists>v. \<Turnstile> v : UPNTIMES r n \<and> flat v = s"
- using Prf.intros(7) flat_Stars by blast
+ using Prf.intros(7) flat_Stars by blast
+next
+ case (NOT r)
+ then show ?case sorry
qed (auto intro: Prf.intros)
@@ -917,7 +877,7 @@
lemma LV_simps:
shows "LV ZERO s = {}"
and "LV ONE s = (if s = [] then {Void} else {})"
- and "LV (CHAR c) s = (if s = [c] then {Char c} else {})"
+ and "LV (CH c) s = (if s = [c] then {Char c} else {})"
and "LV (ALT r1 r2) s = Left ` LV r1 s \<union> Right ` LV r2 s"
unfolding LV_def
apply(auto intro: Prf.intros elim: Prf.cases)
@@ -1226,8 +1186,8 @@
case (ONE s)
show "finite (LV ONE s)" by (simp add: LV_simps)
next
- case (CHAR c s)
- show "finite (LV (CHAR c) s)" by (simp add: LV_simps)
+ case (CH c s)
+ show "finite (LV (CH c) s)" by (simp add: LV_simps)
next
case (ALT r1 r2 s)
then show "finite (LV (ALT r1 r2) s)" by (simp add: LV_simps)
@@ -1270,7 +1230,10 @@
case (NMTIMES r n m s)
have "\<And>s. finite (LV r s)" by fact
then show "finite (LV (NMTIMES r n m) s)"
- by (simp add: LV_NMTIMES_6)
+ by (simp add: LV_NMTIMES_6)
+next
+ case (NOT r s)
+ then show ?case sorry
qed
@@ -1281,7 +1244,7 @@
Posix :: "string \<Rightarrow> rexp \<Rightarrow> val \<Rightarrow> bool" ("_ \<in> _ \<rightarrow> _" [100, 100, 100] 100)
where
Posix_ONE: "[] \<in> ONE \<rightarrow> Void"
-| Posix_CHAR: "[c] \<in> (CHAR c) \<rightarrow> (Char c)"
+| Posix_CHAR: "[c] \<in> (CH c) \<rightarrow> (Char c)"
| Posix_ALT1: "s \<in> r1 \<rightarrow> v \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Left v)"
| Posix_ALT2: "\<lbrakk>s \<in> r2 \<rightarrow> v; s \<notin> L(r1)\<rbrakk> \<Longrightarrow> s \<in> (ALT r1 r2) \<rightarrow> (Right v)"
| Posix_SEQ: "\<lbrakk>s1 \<in> r1 \<rightarrow> v1; s2 \<in> r2 \<rightarrow> v2;
@@ -1320,7 +1283,7 @@
inductive_cases Posix_elims:
"s \<in> ZERO \<rightarrow> v"
"s \<in> ONE \<rightarrow> v"
- "s \<in> CHAR c \<rightarrow> v"
+ "s \<in> CH c \<rightarrow> v"
"s \<in> ALT r1 r2 \<rightarrow> v"
"s \<in> SEQ r1 r2 \<rightarrow> v"
"s \<in> STAR r \<rightarrow> v"
@@ -1396,7 +1359,7 @@
then show "Void = v2" by cases auto
next
case (Posix_CHAR c v2)
- have "[c] \<in> CHAR c \<rightarrow> v2" by fact
+ have "[c] \<in> CH c \<rightarrow> v2" by fact
then show "Char c = v2" by cases auto
next
case (Posix_ALT1 s r1 v r2 v2)
@@ -1503,14 +1466,9 @@
using Posix1(1) Posix1(2) apply blast
apply(case_tac n)
apply(simp)
- apply(simp)
- apply(drule_tac x="va" in meta_spec)
- apply(drule_tac x="vs" in meta_spec)
- apply(simp)
- apply(drule meta_mp)
- apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil diff_Suc_1 local.Posix_FROMNTIMES1(4) val.inject(5))
- apply (metis L.simps(9) Posix1(1) UN_E append.right_neutral append_Nil)
- by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) self_append_conv self_append_conv2)
+ apply(simp)
+ apply (smt (verit, ccfv_threshold) L.simps(9) Posix1(1) UN_E append_eq_append_conv2)
+ by (metis One_nat_def Posix1(1) Posix_FROMNTIMES1.hyps(7) append_Nil2 append_self_conv2)
moreover
have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
"\<And>v2. s2 \<in> FROMNTIMES r (n - 1) \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
@@ -1555,18 +1513,8 @@
apply(case_tac m)
apply(simp)
apply(simp)
- apply(drule_tac x="va" in meta_spec)
- apply(drule_tac x="vs" in meta_spec)
- apply(simp)
- apply(drule meta_mp)
- apply(drule Posix1(1))
- apply(drule Posix1(1))
- apply(drule Posix1(1))
- apply(frule Posix1(1))
- apply(simp)
- using Posix_NMTIMES1.hyps(4) apply force
- apply (metis L.simps(10) Posix1(1) UN_E append_Nil2 append_self_conv2)
- by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append.right_neutral append_Nil)
+ apply (metis L.simps(10) Posix1(1) UN_E append.right_neutral append_Nil)
+ by (metis One_nat_def Posix1(1) Posix_NMTIMES1.hyps(8) append_Nil self_append_conv)
moreover
have IHs: "\<And>v2. s1 \<in> r \<rightarrow> v2 \<Longrightarrow> v = v2"
"\<And>v2. s2 \<in> NMTIMES r (n - 1) (m - 1) \<rightarrow> v2 \<Longrightarrow> Stars vs = v2" by fact+
@@ -1616,7 +1564,6 @@
apply(auto simp add: intro!: Prf.intros elim!: Prf_elims)[2]
apply (metis (mono_tags, lifting) Prf.intros(9) append_Nil empty_iff flat_Stars flats_empty list.set(1) mem_Collect_eq)
apply(simp)
- apply(clarify)
apply(case_tac n)
apply(simp)
apply(simp)
@@ -1632,7 +1579,6 @@
apply(simp)
apply(simp)
apply(simp)
- apply(clarify)
apply(erule Prf_elims)
apply(simp)
apply(rule Prf.intros)
@@ -1660,7 +1606,6 @@
apply(simp)
apply (metis Prf.intros(11) append_Nil empty_iff list.set(1))
apply(simp)
- apply(clarify)
apply(rotate_tac 6)
apply(erule Prf_elims)
apply(simp)
@@ -1675,7 +1620,6 @@
apply(simp)
apply(simp)
apply(simp)
- apply(clarify)
apply(rotate_tac 6)
apply(erule Prf_elims)
apply(simp)
--- a/thys2/Paper/Paper.thy Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/Paper/Paper.thy Thu Jan 27 23:25:26 2022 +0000
@@ -7,7 +7,164 @@
"../SizeBound4"
"HOL-Library.LaTeXsugar"
begin
+
+declare [[show_question_marks = false]]
+
+abbreviation
+ "der_syn r c \<equiv> der c r"
+
+notation (latex output)
+ der_syn ("_\\_" [79, 1000] 76) and
+
+ ZERO ("\<^bold>0" 81) and
+ ONE ("\<^bold>1" 81) and
+ CH ("_" [1000] 80) and
+ ALT ("_ + _" [77,77] 78) and
+ SEQ ("_ \<cdot> _" [77,77] 78) and
+ STAR ("_\<^sup>\<star>" [79] 78)
+
(*>*)
+
+section {* Introduction *}
+
+text {*
+
+Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em
+derivative} @{term "der c r"} of a regular expression \<open>r\<close> w.r.t.\
+a character~\<open>c\<close>, and showed that it gave a simple solution to the
+problem of matching a string @{term s} with a regular expression @{term
+r}: if the derivative of @{term r} w.r.t.\ (in succession) all the
+characters of the string matches the empty string, then @{term r}
+matches @{term s} (and {\em vice versa}). The derivative has the
+property (which may almost be regarded as its specification) that, for
+every string @{term s} and regular expression @{term r} and character
+@{term c}, one has @{term "cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}.
+The beauty of Brzozowski's derivatives is that
+they are neatly expressible in any functional language, and easily
+definable and reasoned about in theorem provers---the definitions just
+consist of inductive datatypes and simple recursive functions. A
+mechanised correctness proof of Brzozowski's matcher in for example HOL4
+has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in
+Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}.
+And another one in Coq is given by Coquand and Siles \cite{Coquand2012}.
+
+If a regular expression matches a string, then in general there is more
+than one way of how the string is matched. There are two commonly used
+disambiguation strategies to generate a unique answer: one is called
+GREEDY matching \cite{Frisch2004} and the other is POSIX
+matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.
+For example consider the string @{term xy} and the regular expression
+\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be
+matched in two `iterations' by the single letter-regular expressions
+@{term x} and @{term y}, or directly in one iteration by @{term xy}. The
+first case corresponds to GREEDY matching, which first matches with the
+left-most symbol and only matches the next symbol in case of a mismatch
+(this is greedy in the sense of preferring instant gratification to
+delayed repletion). The second case is POSIX matching, which prefers the
+longest match.
+
+
+
+\begin{figure}[t]
+\begin{center}
+\begin{tikzpicture}[scale=2,node distance=1.3cm,
+ every node/.style={minimum size=6mm}]
+\node (r1) {@{term "r\<^sub>1"}};
+\node (r2) [right=of r1]{@{term "r\<^sub>2"}};
+\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
+\node (r3) [right=of r2]{@{term "r\<^sub>3"}};
+\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
+\node (r4) [right=of r3]{@{term "r\<^sub>4"}};
+\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
+\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
+\node (v4) [below=of r4]{@{term "v\<^sub>4"}};
+\draw[->,line width=1mm](r4) -- (v4);
+\node (v3) [left=of v4] {@{term "v\<^sub>3"}};
+\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
+\node (v2) [left=of v3]{@{term "v\<^sub>2"}};
+\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
+\node (v1) [left=of v2] {@{term "v\<^sub>1"}};
+\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
+\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
+\end{tikzpicture}
+\end{center}
+\mbox{}\\[-13mm]
+
+\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
+matching the string @{term "[a,b,c]"}. The first phase (the arrows from
+left to right) is \Brz's matcher building successive derivatives. If the
+last regular expression is @{term nullable}, then the functions of the
+second phase are called (the top-down and right-to-left arrows): first
+@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
+how the empty string has been recognised by @{term "r\<^sub>4"}. After
+that the function @{term inj} ``injects back'' the characters of the string into
+the values.
+\label{Sulz}}
+\end{figure}
+
+
+*}
+
+section {* Background *}
+
+text {*
+ Sulzmann-Lu algorithm with inj. State that POSIX rules.
+ metion slg is correct.
+
+
+ \begin{figure}[t]
+ \begin{center}
+ \begin{tabular}{c}
+ @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad
+ @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\
+ @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad
+ @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
+ $\mprset{flushleft}
+ \inferrule
+ {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
+ @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
+ @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
+ {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\
+ @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\
+ $\mprset{flushleft}
+ \inferrule
+ {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+ @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
+ @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
+ @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
+ {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>
+ \end{tabular}
+ \end{center}
+ \caption{Our inductive definition of POSIX values.}\label{POSIXrules}
+ \end{figure}
+
+
+*}
+
+section {* Bitcoded Derivatives *}
+
+text {*
+ bitcoded regexes / decoding / bmkeps
+ gets rid of the second phase (only single phase)
+ correctness
+*}
+
+
+section {* Simplification *}
+
+text {*
+ not direct correspondence with PDERs, because of example
+ problem with retrieve
+
+ correctness
+*}
+
+section {* Bound - NO *}
+
+section {* Bounded Regex / Not *}
+
+section {* Conclusion *}
+
text {*
\cite{AusafDyckhoffUrban2016}
--- a/thys2/Paper/document/root.tex Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/Paper/document/root.tex Thu Jan 27 23:25:26 2022 +0000
@@ -45,33 +45,50 @@
\titlerunning{POSIX Lexing with Bitcoded Derivatives}
\author{Chengsong Tan}{King's College London}{chengsong.tan@kcl.ac.uk}{}{}
\author{Christian Urban}{King's College London}{christian.urban@kcl.ac.uk}{}{}
+\keywords{POSIX matching, Derivatives of Regular Expressions, Isabelle/HOL}
+\category{}
+\ccsdesc[100]{Design and analysis of algorithms}
+\ccsdesc[100]{Formal languages and automata theory}
+\Copyright{\mbox{}}
+\nolinenumbers
\begin{document}
\maketitle
\begin{abstract}
-Brzozowski introduced the notion of derivatives for regular
-expressions. They can be used for a very simple regular expression
-matching algorithm. Sulzmann and Lu cleverly extended this algorithm
-in order to deal with POSIX matching, which is the underlying
-disambiguation strategy for regular expressions needed in lexers.
-Their algorithm generates POSIX values which encode the information of
-\emph{how} a regular expression matches a string---that is, which part
-of the string is matched by which part of the regular expression. In
-this paper we give our inductive definition of what a POSIX value is
-and show $(i)$ that such a value is unique (for given regular
-expression and string being matched) and $(ii)$ that Sulzmann and Lu's
-algorithm always generates such a value (provided that the regular
-expression matches the string). We show that $(iii)$ our inductive
-definition of a POSIX value is equivalent to an alternative definition
-by Okui and Suzuki which identifies POSIX values as least elements
-according to an ordering of values. We also prove the correctness of
-Sulzmann's bitcoded version of the POSIX matching algorithm and extend the
-results to additional constructors for regular expressions. \smallskip
-
-{\bf Keywords:} POSIX matching, Derivatives of Regular Expressions,
-Isabelle/HOL
+ Sulzmann and Lu described a lexing algorithm that calculates
+ Brzozowski derivatives using bit-sequences annotated to regular
+ expressions. Their algorithm generates POSIX values which encode
+ the information of \emph{how} a regular expression matches a
+ string---that is, which part of the string is matched by which part
+ of the regular expression. The purpose of the bit-sequences in
+ Sulzmann and Lu's algorithm is to keep the size of derivatives small
+ which is achieved by `aggressively' simplifying regular expressions.
+ In this paper we describe a slight variant of Sulzmann and Lu's
+ algorithm and \textit{(i)} prove that this algorithm generates
+ unique POSIX values; \textit{(ii)} we also establish a cubic bound
+ for the size of the derivatives---in earlier works, derivatives can
+ grow exponentially even after simplification.
+
+%Brzozowski introduced the notion of derivatives for regular
+%expressions. They can be used for a very simple regular expression
+%matching algorithm. Sulzmann and Lu cleverly extended this algorithm
+%in order to deal with POSIX matching, which is the underlying
+%disambiguation strategy for regular expressions needed in lexers.
+%Their algorithm generates POSIX values which encode the information of
+%\emph{how} a regular expression matches a string---that is, which part
+%of the string is matched by which part of the regular expression. In
+%this paper we give our inductive definition of what a POSIX value is
+%and show $(i)$ that such a value is unique (for given regular
+%expression and string being matched) and $(ii)$ that Sulzmann and Lu's
+%algorithm always generates such a value (provided that the regular
+%expression matches the string). We show that $(iii)$ our inductive
+%definition of a POSIX value is equivalent to an alternative definition
+%by Okui and Suzuki which identifies POSIX values as least elements
+%according to an ordering of values. We also prove the correctness of
+%Sulzmann's bitcoded version of the POSIX matching algorithm and extend the
+%results to additional constructors for regular expressions. \smallskip
\end{abstract}
--- a/thys2/SizeBound4.thy Tue Jan 25 13:12:50 2022 +0000
+++ b/thys2/SizeBound4.thy Thu Jan 27 23:25:26 2022 +0000
@@ -132,15 +132,6 @@
apply(auto)
done
-lemma fuse_Nil:
- shows "fuse [] r = r"
- by (induct r)(simp_all)
-
-(*
-lemma map_fuse_Nil:
- shows "map (fuse []) rs = rs"
- by (induct rs)(simp_all add: fuse_Nil)
-*)
fun intern :: "rexp \<Rightarrow> arexp" where
"intern ZERO = AZERO"
@@ -280,23 +271,23 @@
shows "retrieve (fuse bs r) v = bs @ retrieve r v"
using assms
apply(induct r arbitrary: v bs)
- apply(auto elim: Prf_elims)[4]
- defer
- using retrieve_encode_STARS
- apply(auto elim!: Prf_elims)[1]
- apply(case_tac vs)
- apply(simp)
- apply(simp)
- (* AALTs case *)
+ apply(auto elim: Prf_elims)[4]
+ apply(case_tac x2a)
+ apply(simp)
+ using Prf_elims(1) apply blast
+ apply(case_tac x2a)
+ apply(simp)
+ apply(simp)
+ apply(case_tac list)
apply(simp)
- apply(case_tac x2a)
- apply(simp)
- apply(auto elim!: Prf_elims)[1]
+ apply(simp)
+ apply (smt (verit, best) Prf_elims(3) append_assoc retrieve.simps(4) retrieve.simps(5))
apply(simp)
- apply(case_tac list)
- apply(simp)
- apply(auto)
+ using retrieve_encode_STARS
apply(auto elim!: Prf_elims)[1]
+ apply(case_tac vs)
+ apply(simp)
+ apply(simp)
done
lemma retrieve_fuse:
@@ -314,139 +305,92 @@
apply(simp_all add: retrieve_fuse retrieve_encode_STARS)
done
-(*
-lemma bnullable_Hdbmkeps_Hd:
- assumes "bnullable a"
- shows "bmkeps (AALTs bs (a # rs)) = bs @ (bmkeps a)"
- using assms by simp
-*)
-
-lemma r2:
- assumes "x \<in> set rs" "bnullable x"
- shows "bnullable (AALTs bs rs)"
+lemma retrieve_AALTs_bnullable1:
+ assumes "bnullable r"
+ shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+ = bs @ retrieve r (mkeps (erase r))"
using assms
- apply(induct rs)
- apply(auto)
+ apply(case_tac rs)
+ apply(auto simp add: bnullable_correctness)
done
-lemma r3:
- assumes "\<not> bnullable r"
- "bnullables rs"
- shows "retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs))) =
- retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))"
+lemma retrieve_AALTs_bnullable2:
+ assumes "\<not>bnullable r" "bnullables rs"
+ shows "retrieve (AALTs bs (r # rs)) (mkeps (erase (AALTs bs (r # rs))))
+ = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
using assms
apply(induct rs arbitrary: r bs)
- apply(auto)[1]
apply(auto)
using bnullable_correctness apply blast
- apply(auto simp add: bnullable_correctness mkeps_nullable retrieve_fuse2)
- apply(subst retrieve_fuse2[symmetric])
- apply (smt bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable)
- apply(simp)
- apply(case_tac "bnullable a")
- apply (smt append_Nil2 bnullable.simps(4) bnullable_correctness erase.simps(5) erase.simps(6) fuse.simps(4) insert_iff list.exhaust list.set(2) mkeps.simps(3) mkeps_nullable retrieve_fuse2)
- apply(drule_tac x="a" in meta_spec)
- apply(drule_tac x="bs" in meta_spec)
- apply(drule meta_mp)
- apply(simp)
- apply(drule meta_mp)
- apply(auto)
- apply(subst retrieve_fuse2[symmetric])
apply(case_tac rs)
- apply(simp)
- apply(auto)[1]
- apply (simp add: bnullable_correctness)
-
- apply (metis append_Nil2 bnullable_correctness erase_fuse fuse.simps(4) list.set_intros(1) mkeps.simps(3) mkeps_nullable nullable.simps(4) r2)
- apply (simp add: bnullable_correctness)
- apply (metis append_Nil2 bnullable_correctness erase.simps(6) erase_fuse fuse.simps(4) list.set_intros(2) mkeps.simps(3) mkeps_nullable r2)
- apply(simp)
+ apply(auto)
+ using bnullable_correctness apply blast
+ apply(case_tac rs)
+ apply(auto)
done
-lemma t:
+lemma bmkeps_retrieve_AALTs:
assumes "\<forall>r \<in> set rs. bnullable r \<longrightarrow> bmkeps r = retrieve r (mkeps (erase r))"
"bnullables rs"
shows "bs @ bmkepss rs = retrieve (AALTs bs rs) (mkeps (erase (AALTs bs rs)))"
using assms
apply(induct rs arbitrary: bs)
apply(auto)
- apply (metis (no_types, opaque_lifting) bmkepss.cases bnullable_correctness erase.simps(5) erase.simps(6) mkeps.simps(3) retrieve.simps(3) retrieve.simps(4))
- apply (metis r3)
- apply (metis (no_types, lifting) bmkepss.cases bnullable_correctness empty_iff erase.simps(6) list.set(1) mkeps.simps(3) retrieve.simps(4))
- apply (metis r3)
- done
-
+ using retrieve_AALTs_bnullable1 apply presburger
+ apply (metis retrieve_AALTs_bnullable2)
+ apply (simp add: retrieve_AALTs_bnullable1)
+ by (metis retrieve_AALTs_bnullable2)
+
+
lemma bmkeps_retrieve:
assumes "bnullable r"
shows "bmkeps r = retrieve r (mkeps (erase r))"
using assms
apply(induct r)
- apply(auto)
- using t by auto
+ apply(auto)
+ using bmkeps_retrieve_AALTs by auto
lemma bder_retrieve:
assumes "\<Turnstile> v : der c (erase r)"
shows "retrieve (bder c r) v = retrieve r (injval (erase r) c v)"
using assms
apply(induct r arbitrary: v rule: erase.induct)
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(case_tac "c = ca")
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(simp)
- apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
- apply(erule Prf_elims)
- apply(simp)
- apply(simp)
- apply(case_tac rs)
- apply(simp)
- apply(simp)
- apply (smt Prf_elims(3) injval.simps(2) injval.simps(3) retrieve.simps(4) retrieve.simps(5) same_append_eq)
- apply(simp)
- apply(case_tac "nullable (erase r1)")
- apply(simp)
- apply(erule Prf_elims)
- apply(subgoal_tac "bnullable r1")
- prefer 2
- using bnullable_correctness apply blast
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- apply(subgoal_tac "bnullable r1")
- prefer 2
- using bnullable_correctness apply blast
- apply(simp)
- apply(simp add: retrieve_fuse2)
- apply(simp add: bmkeps_retrieve)
- apply(simp)
- apply(erule Prf_elims)
- apply(simp)
- using bnullable_correctness apply blast
- apply(rename_tac bs r v)
+ using Prf_elims(1) apply auto[1]
+ using Prf_elims(1) apply auto[1]
+ apply(auto)[1]
+ apply (metis Prf_elims(4) injval.simps(1) retrieve.simps(1) retrieve.simps(2))
+ using Prf_elims(1) apply blast
+ (* AALTs case *)
apply(simp)
apply(erule Prf_elims)
- apply(clarify)
+ apply(simp)
+ apply(simp)
+ apply(rename_tac "r\<^sub>1" "r\<^sub>2" rs v)
+ apply(erule Prf_elims)
+ apply(simp)
+ apply(simp)
+ apply(case_tac rs)
+ apply(simp)
+ apply(simp)
+ using Prf_elims(3) apply fastforce
+ (* ASEQ case *)
+ apply(simp)
+ apply(case_tac "nullable (erase r1)")
+ apply(simp)
+ apply(erule Prf_elims)
+ using Prf_elims(2) bnullable_correctness apply force
+ apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
+ apply (simp add: bmkeps_retrieve bnullable_correctness retrieve_fuse2)
+ using Prf_elims(2) apply force
+ (* ASTAR case *)
+ apply(rename_tac bs r v)
+ apply(simp)
apply(erule Prf_elims)
apply(clarify)
- apply(subst injval.simps)
- apply(simp del: retrieve.simps)
- apply(subst retrieve.simps)
- apply(subst retrieve.simps)
- apply(simp)
- apply(simp add: retrieve_fuse2)
- done
-
+ apply(erule Prf_elims)
+ apply(clarify)
+ by (simp add: retrieve_fuse2)
lemma MAIN_decode:
@@ -581,79 +525,6 @@
apply(simp_all)
done
-lemma L_bsimp_ASEQ:
- "L (erase (ASEQ bs r1 r2)) = L (erase (bsimp_ASEQ bs r1 r2))"
- apply(induct bs r1 r2 rule: bsimp_ASEQ.induct)
- apply(simp_all)
- by (metis erase_fuse fuse.simps(4))
-
-lemma L_bsimp_AALTs:
- "L (erase (AALTs bs rs)) = L (erase (bsimp_AALTs bs rs))"
- apply(induct bs rs rule: bsimp_AALTs.induct)
- apply(simp_all add: erase_fuse)
- done
-
-lemma L_erase_AALTs:
- shows "L (erase (AALTs bs rs)) = \<Union> (L ` erase ` (set rs))"
- apply(induct rs)
- apply(simp)
- apply(simp)
- apply(case_tac rs)
- apply(simp)
- apply(simp)
- done
-
-lemma L_erase_flts:
- shows "\<Union> (L ` erase ` (set (flts rs))) = \<Union> (L ` erase ` (set rs))"
- apply(induct rs rule: flts.induct)
- apply(simp_all)
- apply(auto)
- using L_erase_AALTs erase_fuse apply auto[1]
- by (simp add: L_erase_AALTs erase_fuse)
-
-lemma L_erase_dB_acc:
- shows "(\<Union> (L ` acc) \<union> (\<Union> (L ` erase ` (set (distinctBy rs erase acc)))))
- = \<Union> (L ` acc) \<union> \<Union> (L ` erase ` (set rs))"
- apply(induction rs arbitrary: acc)
- apply simp_all
- by (smt (z3) SUP_absorb UN_insert sup_assoc sup_commute)
-
-
-lemma L_erase_dB:
- shows "(\<Union> (L ` erase ` (set (distinctBy rs erase {})))) = \<Union> (L ` erase ` (set rs))"
- by (metis L_erase_dB_acc Un_commute Union_image_empty)
-
-lemma L_bsimp_erase:
- shows "L (erase r) = L (erase (bsimp r))"
- apply(induct r)
- apply(simp)
- apply(simp)
- apply(simp)
- apply(auto simp add: Sequ_def)[1]
- apply(subst L_bsimp_ASEQ[symmetric])
- apply(auto simp add: Sequ_def)[1]
- apply(subst (asm) L_bsimp_ASEQ[symmetric])
- apply(auto simp add: Sequ_def)[1]
- apply(simp)
- apply(subst L_bsimp_AALTs[symmetric])
- defer
- apply(simp)
- apply(subst (2)L_erase_AALTs)
- apply(subst L_erase_dB)
- apply(subst L_erase_flts)
- apply (simp add: L_erase_AALTs)
- done
-
-lemma L_bders_simp:
- shows "L (erase (bders_simp r s)) = L (erase (bders r s))"
- apply(induct s arbitrary: r rule: rev_induct)
- apply(simp)
- apply(simp)
- apply(simp add: ders_append)
- apply(simp add: bders_simp_append)
- apply(simp add: L_bsimp_erase[symmetric])
- by (simp add: der_correctness)
-
lemma bmkeps_fuse:
assumes "bnullable r"
@@ -668,17 +539,6 @@
apply(auto simp add: bmkeps_fuse bnullable_fuse)
done
-
-lemma b4:
- shows "bnullable (bders_simp r s) = bnullable (bders r s)"
-proof -
- have "L (erase (bders_simp r s)) = L (erase (bders r s))"
- using L_bders_simp by force
- then show "bnullable (bders_simp r s) = bnullable (bders r s)"
- using bnullable_correctness nullable_correctness by blast
-qed
-
-
lemma bder_fuse:
shows "bder c (fuse bs a) = fuse bs (bder c a)"
apply(induct a arbitrary: bs c)
@@ -860,7 +720,7 @@
apply(rule rrewrite_srewrite.ss6[simplified])
apply(simp add: erase_fuse)
done
- qed (auto intro: rrewrite_srewrite.intros)
+qed (auto intro: rrewrite_srewrite.intros)
lemma rewrites_fuse:
@@ -1173,7 +1033,7 @@
theorem main_blexer_simp:
shows "blexer r s = blexer_simp r s"
unfolding blexer_def blexer_simp_def
- using b4 main_aux by simp
+ by (metis central main_aux rewrites_bnullable_eq)
theorem blexersimp_correctness:
Binary file thys2/paper.pdf has changed