6 imports Regular_Exp |
6 imports Regular_Exp |
7 begin |
7 begin |
8 |
8 |
9 text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *} |
9 text{* This theory is based on work by Brozowski \cite{Brzozowski64} and Antimirov \cite{Antimirov95}. *} |
10 |
10 |
11 subsection {* Left-Quotients of languages *} |
|
12 |
|
13 definition Deriv :: "'a \<Rightarrow> 'a lang \<Rightarrow> 'a lang" |
|
14 where "Deriv x A = { xs. x#xs \<in> A }" |
|
15 |
|
16 definition Derivs :: "'a list \<Rightarrow> 'a lang \<Rightarrow> 'a lang" |
|
17 where "Derivs xs A = { ys. xs @ ys \<in> A }" |
|
18 |
|
19 abbreviation |
|
20 Derivss :: "'a list \<Rightarrow> 'a lang set \<Rightarrow> 'a lang" |
|
21 where |
|
22 "Derivss s As \<equiv> \<Union> (Derivs s) ` As" |
|
23 |
|
24 |
|
25 lemma Deriv_empty[simp]: "Deriv a {} = {}" |
|
26 and Deriv_epsilon[simp]: "Deriv a {[]} = {}" |
|
27 and Deriv_char[simp]: "Deriv a {[b]} = (if a = b then {[]} else {})" |
|
28 and Deriv_union[simp]: "Deriv a (A \<union> B) = Deriv a A \<union> Deriv a B" |
|
29 by (auto simp: Deriv_def) |
|
30 |
|
31 lemma Deriv_conc_subset: |
|
32 "Deriv a A @@ B \<subseteq> Deriv a (A @@ B)" (is "?L \<subseteq> ?R") |
|
33 proof |
|
34 fix w assume "w \<in> ?L" |
|
35 then obtain u v where "w = u @ v" "a # u \<in> A" "v \<in> B" |
|
36 by (auto simp: Deriv_def) |
|
37 then have "a # w \<in> A @@ B" |
|
38 by (auto intro: concI[of "a # u", simplified]) |
|
39 thus "w \<in> ?R" by (auto simp: Deriv_def) |
|
40 qed |
|
41 |
|
42 lemma Der_conc [simp]: |
|
43 shows "Deriv c (A @@ B) = (Deriv c A) @@ B \<union> (if [] \<in> A then Deriv c B else {})" |
|
44 unfolding Deriv_def conc_def |
|
45 by (auto simp add: Cons_eq_append_conv) |
|
46 |
|
47 lemma Deriv_star [simp]: |
|
48 shows "Deriv c (star A) = (Deriv c A) @@ star A" |
|
49 proof - |
|
50 have incl: "[] \<in> A \<Longrightarrow> Deriv c (star A) \<subseteq> (Deriv c A) @@ star A" |
|
51 unfolding Deriv_def conc_def |
|
52 apply(auto simp add: Cons_eq_append_conv) |
|
53 apply(drule star_decom) |
|
54 apply(auto simp add: Cons_eq_append_conv) |
|
55 done |
|
56 |
|
57 have "Deriv c (star A) = Deriv c (A @@ star A \<union> {[]})" |
|
58 by (simp only: star_unfold_left[symmetric]) |
|
59 also have "... = Deriv c (A @@ star A)" |
|
60 by (simp only: Deriv_union) (simp) |
|
61 also have "... = (Deriv c A) @@ (star A) \<union> (if [] \<in> A then Deriv c (star A) else {})" |
|
62 by simp |
|
63 also have "... = (Deriv c A) @@ star A" |
|
64 using incl by auto |
|
65 finally show "Deriv c (star A) = (Deriv c A) @@ star A" . |
|
66 qed |
|
67 |
|
68 lemma Derivs_simps [simp]: |
|
69 shows "Derivs [] A = A" |
|
70 and "Derivs (c # s) A = Derivs s (Deriv c A)" |
|
71 and "Derivs (s1 @ s2) A = Derivs s2 (Derivs s1 A)" |
|
72 unfolding Derivs_def Deriv_def by auto |
|
73 |
|
74 |
|
75 subsection {* Brozowski's derivatives of regular expressions *} |
11 subsection {* Brozowski's derivatives of regular expressions *} |
76 |
|
77 fun |
|
78 nullable :: "'a rexp \<Rightarrow> bool" |
|
79 where |
|
80 "nullable (Zero) = False" |
|
81 | "nullable (One) = True" |
|
82 | "nullable (Atom c) = False" |
|
83 | "nullable (Plus r1 r2) = (nullable r1 \<or> nullable r2)" |
|
84 | "nullable (Times r1 r2) = (nullable r1 \<and> nullable r2)" |
|
85 | "nullable (Star r) = True" |
|
86 |
12 |
87 fun |
13 fun |
88 deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp" |
14 deriv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp" |
89 where |
15 where |
90 "deriv c (Zero) = Zero" |
16 "deriv c (Zero) = Zero" |
100 where |
26 where |
101 "derivs [] r = r" |
27 "derivs [] r = r" |
102 | "derivs (c # s) r = derivs s (deriv c r)" |
28 | "derivs (c # s) r = derivs s (deriv c r)" |
103 |
29 |
104 |
30 |
105 lemma nullable_iff: |
31 lemma lang_deriv: "lang (deriv c r) = Deriv c (lang r)" |
106 shows "nullable r \<longleftrightarrow> [] \<in> lang r" |
|
107 by (induct r) (auto simp add: conc_def split: if_splits) |
|
108 |
|
109 lemma Deriv_deriv: |
|
110 shows "Deriv c (lang r) = lang (deriv c r)" |
|
111 by (induct r) (simp_all add: nullable_iff) |
32 by (induct r) (simp_all add: nullable_iff) |
112 |
33 |
113 lemma Derivs_derivs: |
34 lemma lang_derivs: "lang (derivs s r) = Derivs s (lang r)" |
114 shows "Derivs s (lang r) = lang (derivs s r)" |
35 by (induct s arbitrary: r) (simp_all add: lang_deriv) |
115 by (induct s arbitrary: r) (simp_all add: Deriv_deriv) |
36 |
|
37 text {* A regular expression matcher: *} |
|
38 |
|
39 definition matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool" where |
|
40 "matcher r s = nullable (derivs s r)" |
|
41 |
|
42 lemma matcher_correctness: "matcher r s \<longleftrightarrow> s \<in> lang r" |
|
43 by (induct s arbitrary: r) |
|
44 (simp_all add: nullable_iff lang_deriv matcher_def Deriv_def) |
116 |
45 |
117 |
46 |
118 subsection {* Antimirov's partial derivatives *} |
47 subsection {* Antimirov's partial derivatives *} |
119 |
48 |
120 abbreviation |
49 abbreviation |
121 "Timess rs r \<equiv> {Times r' r | r'. r' \<in> rs}" |
50 "Timess rs r \<equiv> (\<Union>r' \<in> rs. {Times r' r})" |
122 |
51 |
123 fun |
52 fun |
124 pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set" |
53 pderiv :: "'a \<Rightarrow> 'a rexp \<Rightarrow> 'a rexp set" |
125 where |
54 where |
126 "pderiv c Zero = {}" |
55 "pderiv c Zero = {}" |
133 |
62 |
134 fun |
63 fun |
135 pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set" |
64 pderivs :: "'a list \<Rightarrow> 'a rexp \<Rightarrow> ('a rexp) set" |
136 where |
65 where |
137 "pderivs [] r = {r}" |
66 "pderivs [] r = {r}" |
138 | "pderivs (c # s) r = \<Union> (pderivs s) ` (pderiv c r)" |
67 | "pderivs (c # s) r = \<Union> (pderivs s ` pderiv c r)" |
139 |
68 |
140 abbreviation |
69 abbreviation |
141 pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set" |
70 pderiv_set :: "'a \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set" |
142 where |
71 where |
143 "pderiv_set c rs \<equiv> \<Union> pderiv c ` rs" |
72 "pderiv_set c rs \<equiv> \<Union> (pderiv c ` rs)" |
144 |
73 |
145 abbreviation |
74 abbreviation |
146 pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set" |
75 pderivs_set :: "'a list \<Rightarrow> 'a rexp set \<Rightarrow> 'a rexp set" |
147 where |
76 where |
148 "pderivs_set s rs \<equiv> \<Union> (pderivs s) ` rs" |
77 "pderivs_set s rs \<equiv> \<Union> (pderivs s ` rs)" |
149 |
78 |
150 lemma pderivs_append: |
79 lemma pderivs_append: |
151 "pderivs (s1 @ s2) r = \<Union> (pderivs s2) ` (pderivs s1 r)" |
80 "pderivs (s1 @ s2) r = \<Union> (pderivs s2 ` pderivs s1 r)" |
152 by (induct s1 arbitrary: r) (simp_all) |
81 by (induct s1 arbitrary: r) (simp_all) |
153 |
82 |
154 lemma pderivs_snoc: |
83 lemma pderivs_snoc: |
155 shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)" |
84 shows "pderivs (s @ [c]) r = pderiv_set c (pderivs s r)" |
156 by (simp add: pderivs_append) |
85 by (simp add: pderivs_append) |
166 by (induct s) (simp_all) |
95 by (induct s) (simp_all) |
167 |
96 |
168 subsection {* Relating left-quotients and partial derivatives *} |
97 subsection {* Relating left-quotients and partial derivatives *} |
169 |
98 |
170 lemma Deriv_pderiv: |
99 lemma Deriv_pderiv: |
171 shows "Deriv c (lang r) = \<Union> lang ` (pderiv c r)" |
100 shows "Deriv c (lang r) = \<Union> (lang ` pderiv c r)" |
172 by (induct r) (auto simp add: nullable_iff conc_UNION_distrib) |
101 by (induct r) (auto simp add: nullable_iff conc_UNION_distrib) |
173 |
102 |
174 lemma Derivs_pderivs: |
103 lemma Derivs_pderivs: |
175 shows "Derivs s (lang r) = \<Union> lang ` (pderivs s r)" |
104 shows "Derivs s (lang r) = \<Union> (lang ` pderivs s r)" |
176 proof (induct s arbitrary: r) |
105 proof (induct s arbitrary: r) |
177 case (Cons c s) |
106 case (Cons c s) |
178 have ih: "\<And>r. Derivs s (lang r) = \<Union> lang ` (pderivs s r)" by fact |
107 have ih: "\<And>r. Derivs s (lang r) = \<Union> (lang ` pderivs s r)" by fact |
179 have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp |
108 have "Derivs (c # s) (lang r) = Derivs s (Deriv c (lang r))" by simp |
180 also have "\<dots> = Derivs s (\<Union> lang ` (pderiv c r))" by (simp add: Deriv_pderiv) |
109 also have "\<dots> = Derivs s (\<Union> (lang ` pderiv c r))" by (simp add: Deriv_pderiv) |
181 also have "\<dots> = Derivss s (lang ` (pderiv c r))" |
110 also have "\<dots> = Derivss s (lang ` (pderiv c r))" |
182 by (auto simp add: Derivs_def) |
111 by (auto simp add: Derivs_def) |
183 also have "\<dots> = \<Union> lang ` (pderivs_set s (pderiv c r))" |
112 also have "\<dots> = \<Union> (lang ` (pderivs_set s (pderiv c r)))" |
184 using ih by auto |
113 using ih by auto |
185 also have "\<dots> = \<Union> lang ` (pderivs (c # s) r)" by simp |
114 also have "\<dots> = \<Union> (lang ` (pderivs (c # s) r))" by simp |
186 finally show "Derivs (c # s) (lang r) = \<Union> lang ` pderivs (c # s) r" . |
115 finally show "Derivs (c # s) (lang r) = \<Union> (lang ` pderivs (c # s) r)" . |
187 qed (simp add: Derivs_def) |
116 qed (simp add: Derivs_def) |
188 |
117 |
189 subsection {* Relating derivatives and partial derivatives *} |
118 subsection {* Relating derivatives and partial derivatives *} |
190 |
119 |
191 lemma deriv_pderiv: |
120 lemma deriv_pderiv: |
192 shows "(\<Union> lang ` (pderiv c r)) = lang (deriv c r)" |
121 shows "\<Union> (lang ` (pderiv c r)) = lang (deriv c r)" |
193 unfolding Deriv_deriv[symmetric] Deriv_pderiv by simp |
122 unfolding lang_deriv Deriv_pderiv by simp |
194 |
123 |
195 lemma derivs_pderivs: |
124 lemma derivs_pderivs: |
196 shows "(\<Union> lang ` (pderivs s r)) = lang (derivs s r)" |
125 shows "\<Union> (lang ` (pderivs s r)) = lang (derivs s r)" |
197 unfolding Derivs_derivs[symmetric] Derivs_pderivs by simp |
126 unfolding lang_derivs Derivs_pderivs by simp |
198 |
127 |
199 |
128 |
200 subsection {* Finiteness property of partial derivatives *} |
129 subsection {* Finiteness property of partial derivatives *} |
201 |
130 |
202 definition |
131 definition |
270 have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" |
199 have ih: "pderivs s (Times r1 r2) \<subseteq> Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2)" |
271 by fact |
200 by fact |
272 have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" |
201 have "pderivs (s @ [c]) (Times r1 r2) = pderiv_set c (pderivs s (Times r1 r2))" |
273 by (simp add: pderivs_snoc) |
202 by (simp add: pderivs_snoc) |
274 also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))" |
203 also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2 \<union> (pderivs_lang (PSuf s) r2))" |
275 using ih by (auto) (blast) |
204 using ih by fast |
276 also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)" |
205 also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv_set c (pderivs_lang (PSuf s) r2)" |
277 by (simp) |
206 by (simp) |
278 also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
207 also have "\<dots> = pderiv_set c (Timess (pderivs s r1) r2) \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
279 by (simp add: pderivs_lang_snoc) |
208 by (simp add: pderivs_lang_snoc) |
280 also |
209 also |
281 have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
210 have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs s r1) r2) \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
282 by auto |
211 by auto |
283 also |
212 also |
284 have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
213 have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs s r1)) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
285 by (auto simp add: if_splits) (blast) |
214 by (auto simp add: if_splits) |
286 also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
215 also have "\<dots> = Timess (pderivs (s @ [c]) r1) r2 \<union> pderiv c r2 \<union> pderivs_lang (PSuf s @@ {[c]}) r2" |
287 by (simp add: pderivs_snoc) |
216 by (simp add: pderivs_snoc) |
288 also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2" |
217 also have "\<dots> \<subseteq> Timess (pderivs (s @ [c]) r1) r2 \<union> pderivs_lang (PSuf (s @ [c])) r2" |
289 unfolding pderivs_lang_def by (auto simp add: PSuf_snoc) |
218 unfolding pderivs_lang_def by (auto simp add: PSuf_snoc) |
290 finally show ?case . |
219 finally show ?case . |
317 case (snoc c s) |
246 case (snoc c s) |
318 have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact |
247 have ih: "s \<noteq> [] \<Longrightarrow> pderivs s (Star r) \<subseteq> Timess (pderivs_lang (PSuf s) r) (Star r)" by fact |
319 { assume asm: "s \<noteq> []" |
248 { assume asm: "s \<noteq> []" |
320 have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc) |
249 have "pderivs (s @ [c]) (Star r) = pderiv_set c (pderivs s (Star r))" by (simp add: pderivs_snoc) |
321 also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))" |
250 also have "\<dots> \<subseteq> pderiv_set c (Timess (pderivs_lang (PSuf s) r) (Star r))" |
322 using ih[OF asm] by (auto) (blast) |
251 using ih[OF asm] by fast |
323 also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)" |
252 also have "\<dots> \<subseteq> Timess (pderiv_set c (pderivs_lang (PSuf s) r)) (Star r) \<union> pderiv c (Star r)" |
324 by (auto split: if_splits) (blast)+ |
253 by (auto split: if_splits) |
325 also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))" |
254 also have "\<dots> \<subseteq> Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r) \<union> (Timess (pderiv c r) (Star r))" |
326 by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union) |
255 by (simp only: PSuf_snoc pderivs_lang_snoc pderivs_lang_union) |
327 (auto simp add: pderivs_lang_def) |
256 (auto simp add: pderivs_lang_def) |
328 also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)" |
257 also have "\<dots> = Timess (pderivs_lang (PSuf (s @ [c])) r) (Star r)" |
329 by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def) |
258 by (auto simp add: PSuf_snoc PSuf_Union pderivs_snoc pderivs_lang_def) |
330 finally have ?case . |
259 finally have ?case . |
331 } |
260 } |
332 moreover |
261 moreover |
333 { assume asm: "s = []" |
262 { assume asm: "s = []" |
334 then have ?case |
263 then have ?case by (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def) |
335 apply (auto simp add: pderivs_lang_def pderivs_snoc PSuf_def) |
|
336 apply(rule_tac x = "[c]" in exI) |
|
337 apply(auto) |
|
338 done |
|
339 } |
264 } |
340 ultimately show ?case by blast |
265 ultimately show ?case by blast |
341 qed (simp) |
266 qed (simp) |
342 |
267 |
343 lemma pderivs_lang_Star: |
268 lemma pderivs_lang_Star: |
375 |
300 |
376 lemma finite_pderivs_lang: |
301 lemma finite_pderivs_lang: |
377 shows "finite (pderivs_lang A r)" |
302 shows "finite (pderivs_lang A r)" |
378 by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV) |
303 by (metis finite_pderivs_lang_UNIV pderivs_lang_subset rev_finite_subset subset_UNIV) |
379 |
304 |
380 |
|
381 subsection {* A regular expression matcher based on Brozowski's derivatives *} |
|
382 |
|
383 fun |
|
384 matcher :: "'a rexp \<Rightarrow> 'a list \<Rightarrow> bool" |
|
385 where |
|
386 "matcher r s = nullable (derivs s r)" |
|
387 |
|
388 lemma matcher_correctness: |
|
389 shows "matcher r s \<longleftrightarrow> s \<in> lang r" |
|
390 by (induct s arbitrary: r) |
|
391 (simp_all add: nullable_iff Deriv_deriv[symmetric] Deriv_def) |
|
392 |
|
393 |
|
394 end |
305 end |