progs/Matcher.thy
changeset 167 cfba674a8fdf
child 208 bd5a8a6b3871
equal deleted inserted replaced
166:ef48e378c44e 167:cfba674a8fdf
       
     1 theory Matcher
       
     2   imports "Main" 
       
     3 begin
       
     4 
       
     5 section {* Regular Expressions *}
       
     6 
       
     7 datatype rexp =
       
     8   NULL
       
     9 | EMPTY
       
    10 | CHAR char
       
    11 | SEQ rexp rexp
       
    12 | ALT rexp rexp
       
    13 | STAR rexp
       
    14 
       
    15 
       
    16 section {* Sequential Composition of Sets *}
       
    17 
       
    18 definition
       
    19   Seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ;; _" [100,100] 100)
       
    20 where 
       
    21   "A ;; B = {s1 @ s2 | s1 s2. s1 \<in> A \<and> s2 \<in> B}"
       
    22 
       
    23 text {* Two Simple Properties about Sequential Composition *}
       
    24 
       
    25 lemma seq_empty [simp]:
       
    26   shows "A ;; {[]} = A"
       
    27   and   "{[]} ;; A = A"
       
    28 by (simp_all add: Seq_def)
       
    29 
       
    30 lemma seq_null [simp]:
       
    31   shows "A ;; {} = {}"
       
    32   and   "{} ;; A = {}"
       
    33 by (simp_all add: Seq_def)
       
    34 
       
    35 section {* Kleene Star for Sets *}
       
    36 
       
    37 inductive_set
       
    38   Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
       
    39   for A :: "string set"
       
    40 where
       
    41   start[intro]: "[] \<in> A\<star>"
       
    42 | step[intro]:  "\<lbrakk>s1 \<in> A; s2 \<in> A\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> A\<star>"
       
    43 
       
    44 
       
    45 text {* A Standard Property of Star *}
       
    46 
       
    47 lemma star_cases:
       
    48   shows "A\<star> = {[]} \<union> A ;; A\<star>"
       
    49 unfolding Seq_def
       
    50 by (auto) (metis Star.simps)
       
    51 
       
    52 lemma star_decomp: 
       
    53   assumes a: "c # x \<in> A\<star>" 
       
    54   shows "\<exists>a b. x = a @ b \<and> c # a \<in> A \<and> b \<in> A\<star>"
       
    55 using a
       
    56 by (induct x\<equiv>"c # x" rule: Star.induct) 
       
    57    (auto simp add: append_eq_Cons_conv)
       
    58 
       
    59 
       
    60 section {* Semantics of Regular Expressions *}
       
    61  
       
    62 fun
       
    63   L :: "rexp \<Rightarrow> string set"
       
    64 where
       
    65   "L (NULL) = {}"
       
    66 | "L (EMPTY) = {[]}"
       
    67 | "L (CHAR c) = {[c]}"
       
    68 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
       
    69 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
       
    70 | "L (STAR r) = (L r)\<star>"
       
    71 
       
    72 section {* The Matcher *}
       
    73 
       
    74 fun
       
    75  nullable :: "rexp \<Rightarrow> bool"
       
    76 where
       
    77   "nullable (NULL) = False"
       
    78 | "nullable (EMPTY) = True"
       
    79 | "nullable (CHAR c) = False"
       
    80 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
       
    81 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
       
    82 | "nullable (STAR r) = True"
       
    83 
       
    84 fun
       
    85  der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
       
    86 where
       
    87   "der c (NULL) = NULL"
       
    88 | "der c (EMPTY) = NULL"
       
    89 | "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
       
    90 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
       
    91 | "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
       
    92 | "der c (STAR r) = SEQ (der c r) (STAR r)"
       
    93 
       
    94 fun 
       
    95  ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
       
    96 where
       
    97   "ders [] r = r"
       
    98 | "ders (c # s) r = ders s (der c r)"
       
    99 
       
   100 fun
       
   101   matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool"
       
   102 where
       
   103   "matcher r s = nullable (ders s r)"
       
   104 
       
   105 
       
   106 section {* Correctness Proof of the Matcher *}
       
   107 
       
   108 lemma nullable_correctness:
       
   109   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
       
   110 by (induct r) (auto simp add: Seq_def) 
       
   111 section {* Left-Quotient of a Set *}
       
   112 
       
   113 definition
       
   114   Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
       
   115 where
       
   116   "Der c A \<equiv> {s. [c] @ s \<in> A}"
       
   117 
       
   118 lemma Der_null [simp]:
       
   119   shows "Der c {} = {}"
       
   120 unfolding Der_def
       
   121 by auto
       
   122 
       
   123 lemma Der_empty [simp]:
       
   124   shows "Der c {[]} = {}"
       
   125 unfolding Der_def
       
   126 by auto
       
   127 
       
   128 lemma Der_char [simp]:
       
   129   shows "Der c {[d]} = (if c = d then {[]} else {})"
       
   130 unfolding Der_def
       
   131 by auto
       
   132 
       
   133 lemma Der_union [simp]:
       
   134   shows "Der c (A \<union> B) = Der c A \<union> Der c B"
       
   135 unfolding Der_def
       
   136 by auto
       
   137 
       
   138 lemma Der_seq [simp]:
       
   139   shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
       
   140 unfolding Der_def Seq_def
       
   141 by (auto simp add: Cons_eq_append_conv)
       
   142 
       
   143 lemma Der_star [simp]:
       
   144   shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
       
   145 proof -    
       
   146   have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
       
   147     by (simp only: star_cases[symmetric])
       
   148   also have "... = Der c (A ;; A\<star>)"
       
   149     by (simp only: Der_union Der_empty) (simp)
       
   150   also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
       
   151     by simp
       
   152   also have "... =  (Der c A) ;; A\<star>"
       
   153     unfolding Seq_def Der_def
       
   154     by (auto dest: star_decomp)
       
   155   finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
       
   156 qed
       
   157 
       
   158 
       
   159 lemma der_correctness:
       
   160   shows "L (der c r) = Der c (L r)"
       
   161 by (induct r) 
       
   162    (simp_all add: nullable_correctness)
       
   163 
       
   164 lemma matcher_correctness:
       
   165   shows "matcher r s \<longleftrightarrow> s \<in> L r"
       
   166 by (induct s arbitrary: r)
       
   167    (simp_all add: nullable_correctness der_correctness Der_def)
       
   168 
       
   169 section {* Examples *}
       
   170 
       
   171 definition 
       
   172   "CHRA \<equiv> CHAR (CHR ''a'')"
       
   173 
       
   174 definition 
       
   175   "ALT1 \<equiv> ALT CHRA EMPTY"
       
   176 
       
   177 definition 
       
   178   "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1"
       
   179 
       
   180 value "matcher SEQ3 ''aaa''"
       
   181 
       
   182 value "matcher NULL []"
       
   183 value "matcher (CHAR (CHR ''a'')) [CHR ''a'']"
       
   184 value "matcher (CHAR a) [a,a]"
       
   185 value "matcher (STAR (CHAR a)) []"
       
   186 value "matcher (STAR (CHAR a))  [a,a]"
       
   187 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
       
   188 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
       
   189 
       
   190 section {* Incorrect Matcher - fun-definition rejected *}
       
   191 
       
   192 fun
       
   193   match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
       
   194 where
       
   195   "match [] [] = True"
       
   196 | "match [] (c # s) = False"
       
   197 | "match (NULL # rs) s = False"  
       
   198 | "match (EMPTY # rs) s = match rs s"
       
   199 | "match (CHAR c # rs) [] = False"
       
   200 | "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
       
   201 | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
       
   202 | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
       
   203 | "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
       
   204 
       
   205 
       
   206 end