|
1 theory Matcher |
|
2 imports "Main" |
|
3 begin |
|
4 |
|
5 section {* Regular Expressions *} |
|
6 |
|
7 datatype rexp = |
|
8 NULL |
|
9 | EMPTY |
|
10 | CHAR char |
|
11 | SEQ rexp rexp |
|
12 | ALT rexp rexp |
|
13 | STAR rexp |
|
14 |
|
15 |
|
16 section {* Sequential Composition of Sets *} |
|
17 |
|
18 definition |
|
19 Seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100) |
|
20 where |
|
21 "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}" |
|
22 |
|
23 text {* Two Simple Properties about Sequential Composition *} |
|
24 |
|
25 lemma seq_empty [simp]: |
|
26 shows "A ;; {[]} = A" |
|
27 and "{[]} ;; A = A" |
|
28 by (simp_all add: Seq_def) |
|
29 |
|
30 lemma seq_null [simp]: |
|
31 shows "A ;; {} = {}" |
|
32 and "{} ;; A = {}" |
|
33 by (simp_all add: Seq_def) |
|
34 |
|
35 section {* Kleene Star for Sets *} |
|
36 |
|
37 inductive_set |
|
38 Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102) |
|
39 for A :: "string set" |
|
40 where |
|
41 start[intro]: "[] \<in> A\<star>" |
|
42 | step[intro]: "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>" |
|
43 |
|
44 |
|
45 text {* A Standard Property of Star *} |
|
46 |
|
47 lemma star_cases: |
|
48 shows "A\<star> = {[]} \<union> A ;; A\<star>" |
|
49 unfolding Seq_def |
|
50 by (auto) (metis Star.simps) |
|
51 |
|
52 lemma star_decomp: |
|
53 assumes a: "c # x \<in> A\<star>" |
|
54 shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>" |
|
55 using a |
|
56 by (induct x\<equiv>"c # x" rule: Star.induct) |
|
57 (auto simp add: append_eq_Cons_conv) |
|
58 |
|
59 |
|
60 section {* Semantics of Regular Expressions *} |
|
61 |
|
62 fun |
|
63 L :: "rexp \<Rightarrow> string set" |
|
64 where |
|
65 "L (NULL) = {}" |
|
66 | "L (EMPTY) = {[]}" |
|
67 | "L (CHAR c) = {[c]}" |
|
68 | "L (SEQ r1 r2) = (L r1) ;; (L r2)" |
|
69 | "L (ALT r1 r2) = (L r1) \<union> (L r2)" |
|
70 | "L (STAR r) = (L r)\<star>" |
|
71 |
|
72 section {* The Matcher *} |
|
73 |
|
74 fun |
|
75 nullable :: "rexp \<Rightarrow> bool" |
|
76 where |
|
77 "nullable (NULL) = False" |
|
78 | "nullable (EMPTY) = True" |
|
79 | "nullable (CHAR c) = False" |
|
80 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)" |
|
81 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)" |
|
82 | "nullable (STAR r) = True" |
|
83 |
|
84 fun |
|
85 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp" |
|
86 where |
|
87 "der c (NULL) = NULL" |
|
88 | "der c (EMPTY) = NULL" |
|
89 | "der c (CHAR c') = (if c = c' then EMPTY else NULL)" |
|
90 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)" |
|
91 | "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)" |
|
92 | "der c (STAR r) = SEQ (der c r) (STAR r)" |
|
93 |
|
94 fun |
|
95 ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp" |
|
96 where |
|
97 "ders [] r = r" |
|
98 | "ders (c # s) r = ders s (der c r)" |
|
99 |
|
100 fun |
|
101 matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool" |
|
102 where |
|
103 "matcher r s = nullable (ders s r)" |
|
104 |
|
105 |
|
106 section {* Correctness Proof of the Matcher *} |
|
107 |
|
108 lemma nullable_correctness: |
|
109 shows "nullable r \<longleftrightarrow> [] \<in> (L r)" |
|
110 by (induct r) (auto simp add: Seq_def) |
|
111 section {* Left-Quotient of a Set *} |
|
112 |
|
113 definition |
|
114 Der :: "char \<Rightarrow> string set \<Rightarrow> string set" |
|
115 where |
|
116 "Der c A \<equiv> {s. [c] @ s \<in> A}" |
|
117 |
|
118 lemma Der_null [simp]: |
|
119 shows "Der c {} = {}" |
|
120 unfolding Der_def |
|
121 by auto |
|
122 |
|
123 lemma Der_empty [simp]: |
|
124 shows "Der c {[]} = {}" |
|
125 unfolding Der_def |
|
126 by auto |
|
127 |
|
128 lemma Der_char [simp]: |
|
129 shows "Der c {[d]} = (if c = d then {[]} else {})" |
|
130 unfolding Der_def |
|
131 by auto |
|
132 |
|
133 lemma Der_union [simp]: |
|
134 shows "Der c (A \<union> B) = Der c A \<union> Der c B" |
|
135 unfolding Der_def |
|
136 by auto |
|
137 |
|
138 lemma Der_seq [simp]: |
|
139 shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})" |
|
140 unfolding Der_def Seq_def |
|
141 by (auto simp add: Cons_eq_append_conv) |
|
142 |
|
143 lemma Der_star [simp]: |
|
144 shows "Der c (A\<star>) = (Der c A) ;; A\<star>" |
|
145 proof - |
|
146 have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)" |
|
147 by (simp only: star_cases[symmetric]) |
|
148 also have "... = Der c (A ;; A\<star>)" |
|
149 by (simp only: Der_union Der_empty) (simp) |
|
150 also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})" |
|
151 by simp |
|
152 also have "... = (Der c A) ;; A\<star>" |
|
153 unfolding Seq_def Der_def |
|
154 by (auto dest: star_decomp) |
|
155 finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" . |
|
156 qed |
|
157 |
|
158 |
|
159 lemma der_correctness: |
|
160 shows "L (der c r) = Der c (L r)" |
|
161 by (induct r) |
|
162 (simp_all add: nullable_correctness) |
|
163 |
|
164 lemma matcher_correctness: |
|
165 shows "matcher r s \<longleftrightarrow> s \<in> L r" |
|
166 by (induct s arbitrary: r) |
|
167 (simp_all add: nullable_correctness der_correctness Der_def) |
|
168 |
|
169 section {* Examples *} |
|
170 |
|
171 definition |
|
172 "CHRA \<equiv> CHAR (CHR ''a'')" |
|
173 |
|
174 definition |
|
175 "ALT1 \<equiv> ALT CHRA EMPTY" |
|
176 |
|
177 definition |
|
178 "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1" |
|
179 |
|
180 value "matcher SEQ3 ''aaa''" |
|
181 |
|
182 value "matcher NULL []" |
|
183 value "matcher (CHAR (CHR ''a'')) [CHR ''a'']" |
|
184 value "matcher (CHAR a) [a,a]" |
|
185 value "matcher (STAR (CHAR a)) []" |
|
186 value "matcher (STAR (CHAR a)) [a,a]" |
|
187 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''" |
|
188 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''" |
|
189 |
|
190 section {* Incorrect Matcher - fun-definition rejected *} |
|
191 |
|
192 fun |
|
193 match :: "rexp list \<Rightarrow> string \<Rightarrow> bool" |
|
194 where |
|
195 "match [] [] = True" |
|
196 | "match [] (c # s) = False" |
|
197 | "match (NULL # rs) s = False" |
|
198 | "match (EMPTY # rs) s = match rs s" |
|
199 | "match (CHAR c # rs) [] = False" |
|
200 | "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)" |
|
201 | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" |
|
202 | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s" |
|
203 | "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)" |
|
204 |
|
205 |
|
206 end |