305 | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)" |
305 | "L_rexp (SEQ r1 r2) = (L_rexp r1) ;; (L_rexp r2)" |
306 | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)" |
306 | "L_rexp (ALT r1 r2) = (L_rexp r1) \<union> (L_rexp r2)" |
307 | "L_rexp (STAR r) = (L_rexp r)\<star>" |
307 | "L_rexp (STAR r) = (L_rexp r)\<star>" |
308 end |
308 end |
309 |
309 |
310 text {* |
310 |
311 To obtain equational system out of finite set of equivalent classes, a fold operation |
311 section {* Folds for Sets *} |
312 on finite set @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "fold"} |
312 |
313 more robust than the @{text "fold"} in Isabelle library. The expression @{text "folds f"} |
313 text {* |
|
314 To obtain equational system out of finite set of equivalence classes, a fold operation |
|
315 on finite sets @{text "folds"} is defined. The use of @{text "SOME"} makes @{text "folds"} |
|
316 more robust than the @{text "fold"} in the Isabelle library. The expression @{text "folds f"} |
314 makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"}, |
317 makes sense when @{text "f"} is not @{text "associative"} and @{text "commutitive"}, |
315 while @{text "fold f"} does not. |
318 while @{text "fold f"} does not. |
316 *} |
319 *} |
317 |
320 |
318 definition |
321 definition |
319 folds :: "('a \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> 'b \<Rightarrow> 'a set \<Rightarrow> 'b" |
322 folds :: "('a \<Rightarrow> 'b \<Rightarrow> 'b) \<Rightarrow> 'b \<Rightarrow> 'a set \<Rightarrow> 'b" |
320 where |
323 where |
321 "folds f z S \<equiv> SOME x. fold_graph f z S x" |
324 "folds f z S \<equiv> SOME x. fold_graph f z S x" |
322 |
325 |
323 text {* |
326 text {* |
324 The following lemma assures that the arbitrary choice made by the @{text "SOME"} in @{text "folds"} |
327 The following lemma ensures that the arbitrary choice made by the |
325 does not affect the @{text "L"}-value of the resultant regular expression. |
328 @{text "SOME"} in @{text "folds"} does not affect the @{text "L"}-value |
326 *} |
329 of the resultant regular expression. |
|
330 *} |
|
331 |
327 lemma folds_alt_simp [simp]: |
332 lemma folds_alt_simp [simp]: |
328 "finite rs \<Longrightarrow> L (folds ALT NULL rs) = \<Union> (L ` rs)" |
333 assumes a: "finite rs" |
329 apply (rule set_eq_intro, simp add:folds_def) |
334 shows "L (folds ALT NULL rs) = \<Union> (L ` rs)" |
330 apply (rule someI2_ex, erule finite_imp_fold_graph) |
335 apply(rule set_eq_intro) |
331 by (erule fold_graph.induct, auto) |
336 apply(simp add: folds_def) |
332 |
337 apply(rule someI2_ex) |
333 (* Just a technical lemma. *) |
338 apply(rule_tac finite_imp_fold_graph[OF a]) |
|
339 apply(erule fold_graph.induct) |
|
340 apply(auto) |
|
341 done |
|
342 |
|
343 |
|
344 text {* Just a technical lemma for collections and pairs *} |
|
345 |
334 lemma [simp]: |
346 lemma [simp]: |
335 shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y" |
347 shows "(x, y) \<in> {(x, y). P x y} \<longleftrightarrow> P x y" |
336 by simp |
348 by simp |
337 |
349 |
338 text {* |
350 text {* |
339 @{text "\<approx>L"} is an equivalent class defined by language @{text "Lang"}. |
351 @{text "\<approx>A"} is an equivalence class defined by language @{text "A"}. |
340 *} |
352 *} |
341 definition |
353 definition |
342 str_eq_rel ("\<approx>_" [100] 100) |
354 str_eq_rel ("\<approx>_" [100] 100) |
343 where |
355 where |
344 "\<approx>Lang \<equiv> {(x, y). (\<forall>z. x @ z \<in> Lang \<longleftrightarrow> y @ z \<in> Lang)}" |
356 "\<approx>A \<equiv> {(x, y). (\<forall>z. x @ z \<in> A \<longleftrightarrow> y @ z \<in> A)}" |
345 |
357 |
346 text {* |
358 text {* |
347 Among equivlant clases of @{text "\<approx>Lang"}, the set @{text "finals(Lang)"} singles out |
359 Among the equivalence clases of @{text "\<approx>A"}, the set @{text "finals A"} singles out |
348 those which contains strings from @{text "Lang"}. |
360 those which contains the strings from @{text "A"}. |
349 *} |
361 *} |
350 |
362 |
351 definition |
363 definition |
352 "finals Lang \<equiv> {\<approx>Lang `` {x} | x . x \<in> Lang}" |
364 "finals A \<equiv> {\<approx>A `` {x} | x . x \<in> A}" |
353 |
365 |
354 text {* |
366 text {* |
355 The following lemma show the relationshipt between @{text "finals(Lang)"} and @{text "Lang"}. |
367 The following lemma establishes the relationshipt between |
|
368 @{text "finals A"} and @{text "A"}. |
356 *} |
369 *} |
|
370 |
357 lemma lang_is_union_of_finals: |
371 lemma lang_is_union_of_finals: |
358 "Lang = \<Union> finals(Lang)" |
372 shows "A = \<Union> finals A" |
359 proof |
373 unfolding finals_def |
360 show "Lang \<subseteq> \<Union> (finals Lang)" |
374 unfolding Image_def |
361 proof |
375 unfolding str_eq_rel_def |
362 fix x |
376 apply(auto) |
363 assume "x \<in> Lang" |
377 apply(drule_tac x = "[]" in spec) |
364 thus "x \<in> \<Union> (finals Lang)" |
378 apply(auto) |
365 apply (simp add:finals_def, rule_tac x = "(\<approx>Lang) `` {x}" in exI) |
379 done |
366 by (auto simp:Image_def str_eq_rel_def) |
380 |
367 qed |
|
368 next |
|
369 show "\<Union> (finals Lang) \<subseteq> Lang" |
|
370 apply (clarsimp simp:finals_def str_eq_rel_def) |
|
371 by (drule_tac x = "[]" in spec, auto) |
|
372 qed |
|
373 |
381 |
374 section {* Direction @{text "finite partition \<Rightarrow> regular language"}*} |
382 section {* Direction @{text "finite partition \<Rightarrow> regular language"}*} |
375 |
383 |
376 text {* |
384 text {* |
377 The relationship between equivalent classes can be described by an |
385 The relationship between equivalent classes can be described by an |
378 equational system. |
386 equational system. For example, in equational system \eqref{example_eqns}, |
379 For example, in equational system \eqref{example_eqns}, $X_0, X_1$ are equivalent |
387 $X_0, X_1$ are equivalent classes. The first equation says every string in |
380 classes. The first equation says every string in $X_0$ is obtained either by |
388 $X_0$ is obtained either by appending one $b$ to a string in $X_0$ or by |
381 appending one $b$ to a string in $X_0$ or by appending one $a$ to a string in |
389 appending one $a$ to a string in $X_1$ or just be an empty string |
382 $X_1$ or just be an empty string (represented by the regular expression $\lambda$). Similary, |
390 (represented by the regular expression $\lambda$). Similary, the second |
383 the second equation tells how the strings inside $X_1$ are composed. |
391 equation tells how the strings inside $X_1$ are composed. |
|
392 |
384 \begin{equation}\label{example_eqns} |
393 \begin{equation}\label{example_eqns} |
385 \begin{aligned} |
394 \begin{aligned} |
386 X_0 & = X_0 b + X_1 a + \lambda \\ |
395 X_0 & = X_0 b + X_1 a + \lambda \\ |
387 X_1 & = X_0 a + X_1 b |
396 X_1 & = X_0 a + X_1 b |
388 \end{aligned} |
397 \end{aligned} |
389 \end{equation} |
398 \end{equation} |
390 The summands on the right hand side is represented by the following data type |
399 |
391 @{text "rhs_item"}, mnemonic for 'right hand side item'. |
400 \noindent |
392 Generally, there are two kinds of right hand side items, one kind corresponds to |
401 The summands on the right hand side is represented by the following data |
393 pure regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other kind corresponds to |
402 type @{text "rhs_item"}, mnemonic for 'right hand side item'. Generally, |
394 transitions from one one equivalent class to another, like the $X_0 b, X_1 a$ etc. |
403 there are two kinds of right hand side items, one kind corresponds to pure |
395 *} |
404 regular expressions, like the $\lambda$ in \eqref{example_eqns}, the other |
|
405 kind corresponds to transitions from one one equivalent class to another, |
|
406 like the $X_0 b, X_1 a$ etc. |
|
407 |
|
408 *} |
396 |
409 |
397 datatype rhs_item = |
410 datatype rhs_item = |
398 Lam "rexp" (* Lambda *) |
411 Lam "rexp" (* Lambda *) |
399 | Trn "(string set)" "rexp" (* Transition *) |
412 | Trn "lang" "rexp" (* Transition *) |
|
413 |
400 |
414 |
401 text {* |
415 text {* |
402 In this formalization, pure regular expressions like $\lambda$ is |
416 In this formalization, pure regular expressions like $\lambda$ is |
403 repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is represented by $Trn~X_0~(CHAR~a)$. |
417 repsented by @{text "Lam(EMPTY)"}, while transitions like $X_0 a$ is |
404 *} |
418 represented by @{term "Trn X\<^isub>0 (CHAR a)"}. |
|
419 *} |
405 |
420 |
406 text {* |
421 text {* |
407 The functions @{text "the_r"} and @{text "the_Trn"} are used to extract |
422 The functions @{text "the_r"} and @{text "the_Trn"} are used to extract |
408 subcomponents from right hand side items. |
423 subcomponents from right hand side items. |
409 *} |
424 *} |
410 |
425 |
411 fun the_r :: "rhs_item \<Rightarrow> rexp" |
426 fun |
412 where "the_r (Lam r) = r" |
427 the_r :: "rhs_item \<Rightarrow> rexp" |
413 |
428 where |
414 fun the_Trn:: "rhs_item \<Rightarrow> (string set \<times> rexp)" |
429 "the_r (Lam r) = r" |
415 where "the_Trn (Trn Y r) = (Y, r)" |
430 |
416 |
431 fun |
417 text {* |
432 the_Trn:: "rhs_item \<Rightarrow> (lang \<times> rexp)" |
418 Every right hand side item @{text "itm"} defines a string set given |
433 where |
419 @{text "L(itm)"}, defined as: |
434 "the_Trn (Trn Y r) = (Y, r)" |
|
435 |
|
436 text {* |
|
437 Every right-hand side item @{text "itm"} defines a language given |
|
438 by @{text "L(itm)"}, defined as: |
420 *} |
439 *} |
421 overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> string set" |
440 |
|
441 overloading L_rhs_e \<equiv> "L:: rhs_item \<Rightarrow> lang" |
422 begin |
442 begin |
423 fun L_rhs_e:: "rhs_item \<Rightarrow> string set" |
443 fun L_rhs_e:: "rhs_item \<Rightarrow> lang" |
424 where |
444 where |
425 "L_rhs_e (Lam r) = L r" | |
445 "L_rhs_e (Lam r) = L r" |
426 "L_rhs_e (Trn X r) = X ;; L r" |
446 | "L_rhs_e (Trn X r) = X ;; L r" |
427 end |
447 end |
428 |
448 |
429 text {* |
449 text {* |
430 The right hand side of every equation is represented by a set of |
450 The right hand side of every equation is represented by a set of |
431 items. The string set defined by such a set @{text "itms"} is given |
451 items. The string set defined by such a set @{text "itms"} is given |
432 by @{text "L(itms)"}, defined as: |
452 by @{text "L(itms)"}, defined as: |
433 *} |
453 *} |
434 |
454 |
435 overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> string set" |
455 overloading L_rhs \<equiv> "L:: rhs_item set \<Rightarrow> lang" |
436 begin |
456 begin |
437 fun L_rhs:: "rhs_item set \<Rightarrow> string set" |
457 fun L_rhs:: "rhs_item set \<Rightarrow> lang" |
438 where "L_rhs rhs = \<Union> (L ` rhs)" |
458 where |
|
459 "L_rhs rhs = \<Union> (L ` rhs)" |
439 end |
460 end |
440 |
461 |
441 text {* |
462 text {* |
442 Given a set of equivalent classses @{text "CS"} and one equivalent class @{text "X"} among |
463 Given a set of equivalence classes @{text "CS"} and one equivalence class @{text "X"} among |
443 @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of |
464 @{text "CS"}, the term @{text "init_rhs CS X"} is used to extract the right hand side of |
444 the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"} |
465 the equation describing the formation of @{text "X"}. The definition of @{text "init_rhs"} |
445 is: |
466 is: |
446 *} |
467 *} |
447 |
468 |
1219 thus ?thesis using Inv_ES |
1248 thus ?thesis using Inv_ES |
1220 by (rule last_cl_exists_rexp) |
1249 by (rule last_cl_exists_rexp) |
1221 qed |
1250 qed |
1222 |
1251 |
1223 lemma finals_in_partitions: |
1252 lemma finals_in_partitions: |
1224 "finals Lang \<subseteq> (UNIV // (\<approx>Lang))" |
1253 shows "finals A \<subseteq> (UNIV // \<approx>A)" |
1225 by (auto simp:finals_def quotient_def) |
1254 unfolding finals_def |
|
1255 unfolding quotient_def |
|
1256 by auto |
1226 |
1257 |
1227 theorem hard_direction: |
1258 theorem hard_direction: |
1228 assumes finite_CS: "finite (UNIV // \<approx>Lang)" |
1259 assumes finite_CS: "finite (UNIV // \<approx>A)" |
1229 shows "\<exists> (r::rexp). Lang = L r" |
1260 shows "\<exists>r::rexp. A = L r" |
1230 proof - |
1261 proof - |
1231 have "\<forall> X \<in> (UNIV // (\<approx>Lang)). \<exists> (reg::rexp). X = L reg" |
1262 have "\<forall> X \<in> (UNIV // \<approx>A). \<exists>reg::rexp. X = L reg" |
1232 using finite_CS every_eqcl_has_reg by blast |
1263 using finite_CS every_eqcl_has_reg by blast |
1233 then obtain f |
1264 then obtain f |
1234 where f_prop: "\<forall> X \<in> (UNIV // (\<approx>Lang)). X = L ((f X)::rexp)" |
1265 where f_prop: "\<forall> X \<in> (UNIV // \<approx>A). X = L ((f X)::rexp)" |
1235 by (auto dest:bchoice) |
1266 by (auto dest: bchoice) |
1236 def rs \<equiv> "f ` (finals Lang)" |
1267 def rs \<equiv> "f ` (finals A)" |
1237 have "Lang = \<Union> (finals Lang)" using lang_is_union_of_finals by auto |
1268 have "A = \<Union> (finals A)" using lang_is_union_of_finals by auto |
1238 also have "\<dots> = L (folds ALT NULL rs)" |
1269 also have "\<dots> = L (folds ALT NULL rs)" |
1239 proof - |
1270 proof - |
1240 have "finite rs" |
1271 have "finite rs" |
1241 proof - |
1272 proof - |
1242 have "finite (finals Lang)" |
1273 have "finite (finals A)" |
1243 using finite_CS finals_in_partitions[of "Lang"] |
1274 using finite_CS finals_in_partitions[of "A"] |
1244 by (erule_tac finite_subset, simp) |
1275 by (erule_tac finite_subset, simp) |
1245 thus ?thesis using rs_def by auto |
1276 thus ?thesis using rs_def by auto |
1246 qed |
1277 qed |
1247 thus ?thesis |
1278 thus ?thesis |
1248 using f_prop rs_def finals_in_partitions[of "Lang"] by auto |
1279 using f_prop rs_def finals_in_partitions[of "A"] by auto |
1249 qed |
1280 qed |
1250 finally show ?thesis by blast |
1281 finally show ?thesis by blast |
1251 qed |
1282 qed |
1252 |
1283 |
1253 end |
1284 end |