61 section {* Semantics of Regular Expressions *} |
61 section {* Semantics of Regular Expressions *} |
62 |
62 |
63 fun |
63 fun |
64 L :: "rexp \<Rightarrow> string set" |
64 L :: "rexp \<Rightarrow> string set" |
65 where |
65 where |
66 "L (NULL) = {}" |
66 "L (ZERO) = {}" |
67 | "L (EMPTY) = {[]}" |
67 | "L (ONE) = {[]}" |
68 | "L (CHAR c) = {[c]}" |
68 | "L (CHAR c) = {[c]}" |
69 | "L (SEQ r1 r2) = (L r1) ;; (L r2)" |
69 | "L (SEQ r1 r2) = (L r1) ;; (L r2)" |
70 | "L (ALT r1 r2) = (L r1) \<union> (L r2)" |
70 | "L (ALT r1 r2) = (L r1) \<union> (L r2)" |
71 | "L (STAR r) = (L r)\<star>" |
71 | "L (STAR r) = (L r)\<star>" |
72 |
72 |
73 section {* The Matcher *} |
73 section {* The Matcher *} |
74 |
74 |
75 fun |
75 fun |
76 nullable :: "rexp \<Rightarrow> bool" |
76 nullable :: "rexp \<Rightarrow> bool" |
77 where |
77 where |
78 "nullable (NULL) = False" |
78 "nullable (ZERO) = False" |
79 | "nullable (EMPTY) = True" |
79 | "nullable (ONE) = True" |
80 | "nullable (CHAR c) = False" |
80 | "nullable (CHAR c) = False" |
81 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" |
81 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" |
82 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" |
82 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" |
83 | "nullable (STAR r) = True" |
83 | "nullable (STAR r) = True" |
84 |
84 |
85 fun |
|
86 noccurs :: "rexp \<Rightarrow> bool" |
|
87 where |
|
88 "noccurs (NULL) = True" |
|
89 | "noccurs (EMPTY) = False" |
|
90 | "noccurs (CHAR c) = False" |
|
91 | "noccurs (ALT r1 r2) = (noccurs r1 \<or> noccurs r2)" |
|
92 | "noccurs (SEQ r1 r2) = (noccurs r1 \<or> noccurs r2)" |
|
93 | "noccurs (STAR r) = (noccurs r)" |
|
94 |
85 |
95 lemma |
86 section {* Correctness Proof for Nullable *} |
96 "\<not> noccurs r \<Longrightarrow> L r \<noteq> {}" |
|
97 apply(induct r) |
|
98 apply(auto simp add: Seq_def) |
|
99 done |
|
100 |
|
101 lemma |
|
102 "L r = {} \<Longrightarrow> noccurs r" |
|
103 apply(induct r) |
|
104 apply(auto simp add: Seq_def) |
|
105 done |
|
106 |
|
107 lemma does_not_hold: |
|
108 "noccurs r \<Longrightarrow> L r = {}" |
|
109 apply(induct r) |
|
110 apply(auto simp add: Seq_def) |
|
111 oops |
|
112 |
|
113 fun |
|
114 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" |
|
115 where |
|
116 "der c (NULL) = NULL" |
|
117 | "der c (EMPTY) = NULL" |
|
118 | "der c (CHAR c') = (if c = c' then EMPTY else NULL)" |
|
119 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" |
|
120 | "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" |
|
121 | "der c (STAR r) = SEQ (der c r) (STAR r)" |
|
122 |
|
123 fun |
|
124 ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp" |
|
125 where |
|
126 "ders [] r = r" |
|
127 | "ders (c # s) r = ders s (der c r)" |
|
128 |
|
129 fun |
|
130 matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool" |
|
131 where |
|
132 "matcher r s = nullable (ders s r)" |
|
133 |
|
134 |
|
135 section {* Correctness Proof of the Matcher *} |
|
136 |
87 |
137 lemma nullable_correctness: |
88 lemma nullable_correctness: |
138 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
89 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
139 by (induct r) (auto simp add: Seq_def) |
90 apply(induct r) |
140 section {* Left-Quotient of a Set *} |
91 (* ZERO case *) |
|
92 apply(simp only: nullable.simps) |
|
93 apply(simp only: L.simps) |
|
94 apply(simp) |
|
95 (* ONE case *) |
|
96 apply(simp only: nullable.simps) |
|
97 apply(simp only: L.simps) |
|
98 apply(simp) |
|
99 (* CHAR case *) |
|
100 apply(simp only: nullable.simps) |
|
101 apply(simp only: L.simps) |
|
102 apply(simp) |
|
103 prefer 2 |
|
104 (* ALT case *) |
|
105 apply(simp (no_asm) only: nullable.simps) |
|
106 apply(simp only:) |
|
107 apply(simp only: L.simps) |
|
108 apply(simp) |
|
109 (* SEQ case *) |
|
110 oops |
141 |
111 |
142 fun |
112 lemma nullable_correctness: |
143 zeroable :: "rexp \<Rightarrow> bool" |
113 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
144 where |
114 apply(induct r) |
145 "zeroable (NULL) = True" |
115 apply(simp_all) |
146 | "zeroable (EMPTY) = False" |
116 (* all easy subgoals are proved except the last 2 *) |
147 | "zeroable (CHAR c) = False" |
117 (* where the definition of Seq needs to be unfolded. *) |
148 | "zeroable (ALT r1 r2) = (zeroable r1 \<and> zeroable r2)" |
118 oops |
149 | "zeroable (SEQ r1 r2) = (zeroable r1 \<or> zeroable r2)" |
|
150 | "zeroable (STAR r) = False" |
|
151 |
119 |
|
120 lemma nullable_correctness: |
|
121 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
|
122 apply(induct r) |
|
123 apply(simp_all add: Seq_def) |
|
124 (* except the star case every thing is proved *) |
|
125 (* we need to use the rule for Star.start *) |
|
126 oops |
152 |
127 |
153 lemma zeroable_correctness: |
128 lemma nullable_correctness: |
154 shows "zeroable r \<longleftrightarrow> (L r = {})" |
129 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
155 apply(induct r) |
130 apply(induct r) |
156 apply(auto simp add: Seq_def)[6] |
131 apply(simp_all add: Seq_def Star.start) |
157 done |
132 done |
158 |
|
159 section {* Left-Quotient of a Set *} |
|
160 |
|
161 definition |
|
162 Der :: "char \<Rightarrow> string set \<Rightarrow> string set" |
|
163 where |
|
164 "Der c A \<equiv> {s. [c] @ s \<in> A}" |
|
165 |
|
166 lemma Der_null [simp]: |
|
167 shows "Der c {} = {}" |
|
168 unfolding Der_def |
|
169 by auto |
|
170 |
|
171 lemma Der_empty [simp]: |
|
172 shows "Der c {[]} = {}" |
|
173 unfolding Der_def |
|
174 by auto |
|
175 |
|
176 lemma Der_char [simp]: |
|
177 shows "Der c {[d]} = (if c = d then {[]} else {})" |
|
178 unfolding Der_def |
|
179 by auto |
|
180 |
|
181 lemma Der_union [simp]: |
|
182 shows "Der c (A \<union> B) = Der c A \<union> Der c B" |
|
183 unfolding Der_def |
|
184 by auto |
|
185 |
|
186 lemma Der_seq [simp]: |
|
187 shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})" |
|
188 unfolding Der_def Seq_def |
|
189 by (auto simp add: Cons_eq_append_conv) |
|
190 |
|
191 lemma Der_star [simp]: |
|
192 shows "Der c (A\<star>) = (Der c A) ;; A\<star>" |
|
193 proof - |
|
194 have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)" |
|
195 by (simp only: star_cases[symmetric]) |
|
196 also have "... = Der c (A ;; A\<star>)" |
|
197 by (simp only: Der_union Der_empty) (simp) |
|
198 also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})" |
|
199 by simp |
|
200 also have "... = (Der c A) ;; A\<star>" |
|
201 unfolding Seq_def Der_def |
|
202 by (auto dest: star_decomp) |
|
203 finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" . |
|
204 qed |
|
205 |
|
206 |
|
207 lemma der_correctness: |
|
208 shows "L (der c r) = Der c (L r)" |
|
209 by (induct r) |
|
210 (simp_all add: nullable_correctness) |
|
211 |
|
212 lemma matcher_correctness: |
|
213 shows "matcher r s \<longleftrightarrow> s \<in> L r" |
|
214 by (induct s arbitrary: r) |
|
215 (simp_all add: nullable_correctness der_correctness Der_def) |
|
216 |
|
217 section {* Examples *} |
|
218 |
|
219 definition |
|
220 "CHRA \<equiv> CHAR (CHR ''a'')" |
|
221 |
|
222 definition |
|
223 "ALT1 \<equiv> ALT CHRA EMPTY" |
|
224 |
|
225 definition |
|
226 "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1" |
|
227 |
|
228 value "matcher SEQ3 ''aaa''" |
|
229 |
|
230 value "matcher NULL []" |
|
231 value "matcher (CHAR (CHR ''a'')) [CHR ''a'']" |
|
232 value "matcher (CHAR a) [a,a]" |
|
233 value "matcher (STAR (CHAR a)) []" |
|
234 value "matcher (STAR (CHAR a)) [a,a]" |
|
235 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" |
|
236 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" |
|
237 |
|
238 section {* Incorrect Matcher - fun-definition rejected *} |
|
239 |
|
240 fun |
|
241 match :: "rexp list \<Rightarrow> string \<Rightarrow> bool" |
|
242 where |
|
243 "match [] [] = True" |
|
244 | "match [] (c # s) = False" |
|
245 | "match (NULL # rs) s = False" |
|
246 | "match (EMPTY # rs) s = match rs s" |
|
247 | "match (CHAR c # rs) [] = False" |
|
248 | "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)" |
|
249 | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" |
|
250 | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s" |
|
251 | "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)" |
|
252 |
133 |
253 |
134 |
254 end |
135 end |