|
1 theory Matcher |
|
2 imports "Main" |
|
3 begin |
|
4 |
|
5 |
|
6 section {* Sequential Composition of Sets *} |
|
7 |
|
8 fun |
|
9 lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100) |
|
10 where |
|
11 "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}" |
|
12 |
|
13 |
|
14 section {* Kleene Star for Sets *} |
|
15 |
|
16 inductive_set |
|
17 Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) |
|
18 for L :: "string set" |
|
19 where |
|
20 start[intro]: "[] \<in> L\<star>" |
|
21 | step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>" |
|
22 |
|
23 |
|
24 text {* A standard property of star *} |
|
25 |
|
26 lemma lang_star_cases: |
|
27 shows "L\<star> = {[]} \<union> L ; L\<star>" |
|
28 by (auto) (metis Star.simps) |
|
29 |
|
30 section {* Regular Expressions *} |
|
31 |
|
32 datatype rexp = |
|
33 NULL |
|
34 | EMPTY |
|
35 | CHAR char |
|
36 | SEQ rexp rexp |
|
37 | ALT rexp rexp |
|
38 | STAR rexp |
|
39 |
|
40 |
|
41 section {* Semantics of Regular Expressions *} |
|
42 |
|
43 fun |
|
44 L :: "rexp \<Rightarrow> string set" |
|
45 where |
|
46 "L (NULL) = {}" |
|
47 | "L (EMPTY) = {[]}" |
|
48 | "L (CHAR c) = {[c]}" |
|
49 | "L (SEQ r1 r2) = (L r1) ; (L r2)" |
|
50 | "L (ALT r1 r2) = (L r1) \<union> (L r2)" |
|
51 | "L (STAR r) = (L r)\<star>" |
|
52 |
|
53 |
|
54 section {* The Matcher *} |
|
55 |
|
56 fun |
|
57 nullable :: "rexp \<Rightarrow> bool" |
|
58 where |
|
59 "nullable (NULL) = False" |
|
60 | "nullable (EMPTY) = True" |
|
61 | "nullable (CHAR c) = False" |
|
62 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" |
|
63 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" |
|
64 | "nullable (STAR r) = True" |
|
65 |
|
66 fun |
|
67 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" |
|
68 where |
|
69 "der c (NULL) = NULL" |
|
70 | "der c (EMPTY) = NULL" |
|
71 | "der c (CHAR c') = (if c=c' then EMPTY else NULL)" |
|
72 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" |
|
73 | "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" |
|
74 | "der c (STAR r) = SEQ (der c r) (STAR r)" |
|
75 |
|
76 fun |
|
77 derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp" |
|
78 where |
|
79 "derivative [] r = r" |
|
80 | "derivative (c#s) r = derivative s (der c r)" |
|
81 |
|
82 fun |
|
83 matches :: "rexp \<Rightarrow> string \<Rightarrow> bool" |
|
84 where |
|
85 "matches r s = nullable (derivative s r)" |
|
86 |
|
87 |
|
88 section {* Correctness Proof of the Matcher *} |
|
89 |
|
90 lemma nullable_correctness: |
|
91 shows "nullable r \<longleftrightarrow> [] \<in> L r" |
|
92 by (induct r) (auto) |
|
93 |
|
94 lemma der_correctness: |
|
95 shows "s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" |
|
96 proof (induct r arbitrary: s) |
|
97 case (SEQ r1 r2 s) |
|
98 have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c#s \<in> L r1" by fact |
|
99 have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c#s \<in> L r2" by fact |
|
100 show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c#s \<in> L (SEQ r1 r2)" |
|
101 using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv) |
|
102 next |
|
103 case (STAR r s) |
|
104 have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c#s \<in> L r" by fact |
|
105 show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c#s \<in> L (STAR r)" |
|
106 proof |
|
107 assume "s \<in> L (der c (STAR r))" |
|
108 then have "s \<in> L (der c r) ; L r\<star>" by simp |
|
109 then have "c#s \<in> L r ; (L r)\<star>" |
|
110 by (auto simp add: ih Cons_eq_append_conv) |
|
111 then have "c#s \<in> (L r)\<star>" using lang_star_cases by auto |
|
112 then show "c#s \<in> L (STAR r)" by simp |
|
113 next |
|
114 assume "c#s \<in> L (STAR r)" |
|
115 then have "c#s \<in> (L r)\<star>" by simp |
|
116 then have "s \<in> L (der c r); (L r)\<star>" |
|
117 by (induct x\<equiv>"c#s" rule: Star.induct) |
|
118 (auto simp add: ih append_eq_Cons_conv) |
|
119 then show "s \<in> L (der c (STAR r))" by simp |
|
120 qed |
|
121 qed (simp_all) |
|
122 |
|
123 lemma matches_correctness: |
|
124 shows "matches r s \<longleftrightarrow> s \<in> L r" |
|
125 by (induct s arbitrary: r) |
|
126 (simp_all add: nullable_correctness der_correctness) |
|
127 |
|
128 section {* Examples *} |
|
129 |
|
130 value "matches NULL []" |
|
131 value "matches (CHAR (CHR ''a'')) [CHR ''a'']" |
|
132 value "matches (CHAR a) [a,a]" |
|
133 value "matches (STAR (CHAR a)) []" |
|
134 value "matches (STAR (CHAR a)) [a,a]" |
|
135 value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" |
|
136 value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" |
|
137 |
|
138 section {* Incorrect Matcher - fun-definition rejected *} |
|
139 |
|
140 function |
|
141 match :: "rexp list \<Rightarrow> string \<Rightarrow> bool" |
|
142 where |
|
143 "match [] [] = True" |
|
144 | "match [] (c#s) = False" |
|
145 | "match (NULL#rs) s = False" |
|
146 | "match (EMPTY#rs) s = match rs s" |
|
147 | "match (CHAR c#rs) [] = False" |
|
148 | "match (CHAR c#rs) (d#s) = (if c = d then match rs s else False)" |
|
149 | "match (ALT r1 r2#rs) s = (match (r1#rs) s \<or> match (r2#rs) s)" |
|
150 | "match (SEQ r1 r2#rs) s = match (r1#r2#rs) s" |
|
151 | "match (STAR r#rs) s = (match rs s \<or> match (r#(STAR r)#rs) s)" |
|
152 apply(pat_completeness) |
|
153 apply(auto) |
|
154 done |
|
155 |
|
156 end |