author | urbanc |
Sun, 20 Feb 2011 12:52:35 +0000 | |
changeset 129 | 9dd70b10d90c |
parent 103 | f460d5f75cb5 |
child 154 | 7c68b9ad4486 |
permissions | -rw-r--r-- |
3 | 1 |
theory Matcher |
2 |
imports "Main" |
|
3 |
begin |
|
4 |
||
5 |
||
6 |
section {* Sequential Composition of Sets *} |
|
7 |
||
8 |
fun |
|
103 | 9 |
lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100) |
3 | 10 |
where |
103 | 11 |
"L1 ;; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}" |
3 | 12 |
|
13 |
||
14 |
section {* Kleene Star for Sets *} |
|
15 |
||
16 |
inductive_set |
|
17 |
Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) |
|
18 |
for L :: "string set" |
|
19 |
where |
|
20 |
start[intro]: "[] \<in> L\<star>" |
|
21 |
| step[intro]: "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>" |
|
22 |
||
23 |
||
24 | 24 |
text {* A standard property of Star *} |
3 | 25 |
|
26 |
lemma lang_star_cases: |
|
103 | 27 |
shows "L\<star> = {[]} \<union> L ;; L\<star>" |
3 | 28 |
by (auto) (metis Star.simps) |
29 |
||
30 |
section {* Regular Expressions *} |
|
31 |
||
32 |
datatype rexp = |
|
33 |
NULL |
|
34 |
| EMPTY |
|
35 |
| CHAR char |
|
36 |
| SEQ rexp rexp |
|
37 |
| ALT rexp rexp |
|
38 |
| STAR rexp |
|
39 |
||
103 | 40 |
types lang = "string set" |
3 | 41 |
|
102 | 42 |
definition |
103 | 43 |
DERIV :: "string \<Rightarrow> lang \<Rightarrow> lang" |
44 |
where |
|
102 | 45 |
"DERIV s A \<equiv> {s'. s @ s' \<in> A}" |
46 |
||
47 |
definition |
|
103 | 48 |
delta :: "lang \<Rightarrow> lang" |
49 |
where |
|
102 | 50 |
"delta A = (if [] \<in> A then {[]} else {})" |
51 |
||
103 | 52 |
lemma |
53 |
"DERIV s (P ; Q) = \<Union>{(delta (DERIV s1 P)) ; (DERIV s2 Q) | s1 s2. s = s1 @ s2}" |
|
54 |
apply(auto) |
|
102 | 55 |
|
103 | 56 |
fun |
57 |
D1 :: "string \<Rightarrow> lang \<Rightarrow> lang" |
|
58 |
where |
|
59 |
"D1 [] A = A" |
|
60 |
| "D1 (c # s) A = DERIV [c] (D1 s A)" |
|
61 |
||
62 |
fun |
|
63 |
D2 :: "string \<Rightarrow> lang \<Rightarrow> lang" |
|
64 |
where |
|
65 |
"D2 [] A = A" |
|
66 |
| "D2 (c # s) A = DERIV [c] (D1 s A)" |
|
67 |
||
68 |
function |
|
69 |
D |
|
70 |
where |
|
71 |
"D s P Q = P ;; Q" |
|
72 |
| "D (c#s) = |
|
102 | 73 |
|
3 | 74 |
section {* Semantics of Regular Expressions *} |
75 |
||
76 |
fun |
|
77 |
L :: "rexp \<Rightarrow> string set" |
|
78 |
where |
|
79 |
"L (NULL) = {}" |
|
80 |
| "L (EMPTY) = {[]}" |
|
81 |
| "L (CHAR c) = {[c]}" |
|
82 |
| "L (SEQ r1 r2) = (L r1) ; (L r2)" |
|
83 |
| "L (ALT r1 r2) = (L r1) \<union> (L r2)" |
|
84 |
| "L (STAR r) = (L r)\<star>" |
|
85 |
||
86 |
||
87 |
section {* The Matcher *} |
|
88 |
||
89 |
fun |
|
90 |
nullable :: "rexp \<Rightarrow> bool" |
|
91 |
where |
|
92 |
"nullable (NULL) = False" |
|
93 |
| "nullable (EMPTY) = True" |
|
94 |
| "nullable (CHAR c) = False" |
|
95 |
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" |
|
96 |
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" |
|
97 |
| "nullable (STAR r) = True" |
|
98 |
||
99 |
fun |
|
100 |
der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" |
|
101 |
where |
|
102 |
"der c (NULL) = NULL" |
|
103 |
| "der c (EMPTY) = NULL" |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
104 |
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)" |
3 | 105 |
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" |
106 |
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" |
|
107 |
| "der c (STAR r) = SEQ (der c r) (STAR r)" |
|
108 |
||
109 |
fun |
|
110 |
derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp" |
|
111 |
where |
|
112 |
"derivative [] r = r" |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
113 |
| "derivative (c # s) r = derivative s (der c r)" |
3 | 114 |
|
115 |
fun |
|
116 |
matches :: "rexp \<Rightarrow> string \<Rightarrow> bool" |
|
117 |
where |
|
118 |
"matches r s = nullable (derivative s r)" |
|
119 |
||
120 |
||
121 |
section {* Correctness Proof of the Matcher *} |
|
122 |
||
123 |
lemma nullable_correctness: |
|
124 |
shows "nullable r \<longleftrightarrow> [] \<in> L r" |
|
125 |
by (induct r) (auto) |
|
126 |
||
127 |
lemma der_correctness: |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
128 |
shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" |
3 | 129 |
proof (induct r arbitrary: s) |
130 |
case (SEQ r1 r2 s) |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
131 |
have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
132 |
have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
133 |
show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)" |
3 | 134 |
using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv) |
135 |
next |
|
136 |
case (STAR r s) |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
137 |
have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
138 |
show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)" |
3 | 139 |
proof |
140 |
assume "s \<in> L (der c (STAR r))" |
|
141 |
then have "s \<in> L (der c r) ; L r\<star>" by simp |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
142 |
then have "c # s \<in> L r ; (L r)\<star>" |
3 | 143 |
by (auto simp add: ih Cons_eq_append_conv) |
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
144 |
then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
145 |
then show "c # s \<in> L (STAR r)" by simp |
3 | 146 |
next |
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
147 |
assume "c # s \<in> L (STAR r)" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
148 |
then have "c # s \<in> (L r)\<star>" by simp |
3 | 149 |
then have "s \<in> L (der c r); (L r)\<star>" |
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
150 |
by (induct x\<equiv>"c # s" rule: Star.induct) |
3 | 151 |
(auto simp add: ih append_eq_Cons_conv) |
152 |
then show "s \<in> L (der c (STAR r))" by simp |
|
153 |
qed |
|
154 |
qed (simp_all) |
|
155 |
||
156 |
lemma matches_correctness: |
|
157 |
shows "matches r s \<longleftrightarrow> s \<in> L r" |
|
158 |
by (induct s arbitrary: r) |
|
159 |
(simp_all add: nullable_correctness der_correctness) |
|
160 |
||
161 |
section {* Examples *} |
|
162 |
||
163 |
value "matches NULL []" |
|
164 |
value "matches (CHAR (CHR ''a'')) [CHR ''a'']" |
|
165 |
value "matches (CHAR a) [a,a]" |
|
166 |
value "matches (STAR (CHAR a)) []" |
|
167 |
value "matches (STAR (CHAR a)) [a,a]" |
|
168 |
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" |
|
169 |
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" |
|
170 |
||
171 |
section {* Incorrect Matcher - fun-definition rejected *} |
|
172 |
||
173 |
function |
|
174 |
match :: "rexp list \<Rightarrow> string \<Rightarrow> bool" |
|
175 |
where |
|
176 |
"match [] [] = True" |
|
5
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
177 |
| "match [] (c # s) = False" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
178 |
| "match (NULL # rs) s = False" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
179 |
| "match (EMPTY # rs) s = match rs s" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
180 |
| "match (CHAR c # rs) [] = False" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
181 |
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
182 |
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
183 |
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s" |
074d9a4b2bc9
added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents:
3
diff
changeset
|
184 |
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)" |
3 | 185 |
apply(pat_completeness) |
186 |
apply(auto) |
|
187 |
done |
|
188 |
||
189 |
end |