progs/Matcher.thy
changeset 495 7d9d86dc7aa0
parent 208 bd5a8a6b3871
child 882 5fcad75ade92
equal deleted inserted replaced
494:d0fc671bcbbf 495:7d9d86dc7aa0
     4 
     4 
     5 
     5 
     6 section {* Regular Expressions *}
     6 section {* Regular Expressions *}
     7 
     7 
     8 datatype rexp =
     8 datatype rexp =
     9   NULL
     9   ZERO
    10 | EMPTY
    10 | ONE
    11 | CHAR char
    11 | CHAR char
    12 | SEQ rexp rexp
    12 | SEQ rexp rexp
    13 | ALT rexp rexp
    13 | ALT rexp rexp
    14 | STAR rexp
    14 | STAR rexp
    15 
    15 
    61 section {* Semantics of Regular Expressions *}
    61 section {* Semantics of Regular Expressions *}
    62  
    62  
    63 fun
    63 fun
    64   L :: "rexp \<Rightarrow> string set"
    64   L :: "rexp \<Rightarrow> string set"
    65 where
    65 where
    66   "L (NULL) = {}"
    66   "L (ZERO) = {}"
    67 | "L (EMPTY) = {[]}"
    67 | "L (ONE) = {[]}"
    68 | "L (CHAR c) = {[c]}"
    68 | "L (CHAR c) = {[c]}"
    69 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
    69 | "L (SEQ r1 r2) = (L r1) ;; (L r2)"
    70 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
    70 | "L (ALT r1 r2) = (L r1) \<union> (L r2)"
    71 | "L (STAR r) = (L r)\<star>"
    71 | "L (STAR r) = (L r)\<star>"
    72 
    72 
    73 section {* The Matcher *}
    73 section {* The Matcher *}
    74 
    74 
    75 fun
    75 fun
    76  nullable :: "rexp \<Rightarrow> bool"
    76  nullable :: "rexp \<Rightarrow> bool"
    77 where
    77 where
    78   "nullable (NULL) = False"
    78   "nullable (ZERO) = False"
    79 | "nullable (EMPTY) = True"
    79 | "nullable (ONE) = True"
    80 | "nullable (CHAR c) = False"
    80 | "nullable (CHAR c) = False"
    81 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
    81 | "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
    82 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
    82 | "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
    83 | "nullable (STAR r) = True"
    83 | "nullable (STAR r) = True"
    84 
    84 
    85 fun
       
    86  noccurs :: "rexp \<Rightarrow> bool"
       
    87 where
       
    88   "noccurs (NULL) = True"
       
    89 | "noccurs (EMPTY) = False"
       
    90 | "noccurs (CHAR c) = False"
       
    91 | "noccurs (ALT r1 r2) = (noccurs r1 \<or> noccurs r2)"
       
    92 | "noccurs (SEQ r1 r2) = (noccurs r1 \<or> noccurs r2)"
       
    93 | "noccurs (STAR r) = (noccurs r)"
       
    94 
    85 
    95 lemma
    86 section {* Correctness Proof for Nullable *}
    96   "\<not> noccurs r \<Longrightarrow> L r \<noteq> {}"
       
    97 apply(induct r)
       
    98 apply(auto simp add: Seq_def)
       
    99 done
       
   100 
       
   101 lemma
       
   102   "L r = {} \<Longrightarrow> noccurs r"
       
   103 apply(induct r)
       
   104 apply(auto simp add: Seq_def)
       
   105 done
       
   106 
       
   107 lemma does_not_hold:
       
   108   "noccurs r \<Longrightarrow> L r = {}"
       
   109 apply(induct r)
       
   110 apply(auto simp add: Seq_def)
       
   111 oops
       
   112 
       
   113 fun
       
   114  der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
       
   115 where
       
   116   "der c (NULL) = NULL"
       
   117 | "der c (EMPTY) = NULL"
       
   118 | "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
       
   119 | "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
       
   120 | "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
       
   121 | "der c (STAR r) = SEQ (der c r) (STAR r)"
       
   122 
       
   123 fun 
       
   124  ders :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
       
   125 where
       
   126   "ders [] r = r"
       
   127 | "ders (c # s) r = ders s (der c r)"
       
   128 
       
   129 fun
       
   130   matcher :: "rexp \<Rightarrow> string \<Rightarrow> bool"
       
   131 where
       
   132   "matcher r s = nullable (ders s r)"
       
   133 
       
   134 
       
   135 section {* Correctness Proof of the Matcher *}
       
   136 
    87 
   137 lemma nullable_correctness:
    88 lemma nullable_correctness:
   138   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
    89   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
   139 by (induct r) (auto simp add: Seq_def) 
    90 apply(induct r)
   140 section {* Left-Quotient of a Set *}
    91 (* ZERO case *)
       
    92 apply(simp only: nullable.simps)
       
    93 apply(simp only: L.simps)
       
    94 apply(simp)
       
    95 (* ONE case *)
       
    96 apply(simp only: nullable.simps)
       
    97 apply(simp only: L.simps)
       
    98 apply(simp)
       
    99 (* CHAR case *)
       
   100 apply(simp only: nullable.simps)
       
   101 apply(simp only: L.simps)
       
   102 apply(simp)
       
   103 prefer 2
       
   104 (* ALT case *)
       
   105 apply(simp (no_asm) only: nullable.simps)
       
   106 apply(simp only:)
       
   107 apply(simp only: L.simps)
       
   108 apply(simp)
       
   109 (* SEQ case *)
       
   110 oops
   141 
   111 
   142 fun
   112 lemma nullable_correctness:
   143  zeroable :: "rexp \<Rightarrow> bool"
   113   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
   144 where
   114 apply(induct r)
   145   "zeroable (NULL) = True"
   115 apply(simp_all)
   146 | "zeroable (EMPTY) = False"
   116 (* all easy subgoals are proved except the last 2 *)
   147 | "zeroable (CHAR c) = False"
   117 (* where the definition of Seq needs to be unfolded. *)
   148 | "zeroable (ALT r1 r2) = (zeroable r1 \<and> zeroable r2)"
   118 oops
   149 | "zeroable (SEQ r1 r2) = (zeroable r1 \<or> zeroable r2)"
       
   150 | "zeroable (STAR r) = False"
       
   151 
   119 
       
   120 lemma nullable_correctness:
       
   121   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
       
   122 apply(induct r)
       
   123 apply(simp_all add: Seq_def)
       
   124 (* except the star case every thing is proved *)
       
   125 (* we need to use the rule for Star.start *)
       
   126 oops
   152 
   127 
   153 lemma zeroable_correctness:
   128 lemma nullable_correctness:
   154   shows "zeroable r  \<longleftrightarrow>  (L r = {})"
   129   shows "nullable r  \<longleftrightarrow> [] \<in> (L r)"
   155 apply(induct r)
   130 apply(induct r)
   156 apply(auto simp add: Seq_def)[6]
   131 apply(simp_all add: Seq_def Star.start)
   157 done
   132 done
   158 
       
   159 section {* Left-Quotient of a Set *}
       
   160 
       
   161 definition
       
   162   Der :: "char \<Rightarrow> string set \<Rightarrow> string set"
       
   163 where
       
   164   "Der c A \<equiv> {s. [c] @ s \<in> A}"
       
   165 
       
   166 lemma Der_null [simp]:
       
   167   shows "Der c {} = {}"
       
   168 unfolding Der_def
       
   169 by auto
       
   170 
       
   171 lemma Der_empty [simp]:
       
   172   shows "Der c {[]} = {}"
       
   173 unfolding Der_def
       
   174 by auto
       
   175 
       
   176 lemma Der_char [simp]:
       
   177   shows "Der c {[d]} = (if c = d then {[]} else {})"
       
   178 unfolding Der_def
       
   179 by auto
       
   180 
       
   181 lemma Der_union [simp]:
       
   182   shows "Der c (A \<union> B) = Der c A \<union> Der c B"
       
   183 unfolding Der_def
       
   184 by auto
       
   185 
       
   186 lemma Der_seq [simp]:
       
   187   shows "Der c (A ;; B) = (Der c A) ;; B \<union> (if [] \<in> A then Der c B else {})"
       
   188 unfolding Der_def Seq_def
       
   189 by (auto simp add: Cons_eq_append_conv)
       
   190 
       
   191 lemma Der_star [simp]:
       
   192   shows "Der c (A\<star>) = (Der c A) ;; A\<star>"
       
   193 proof -    
       
   194   have "Der c (A\<star>) = Der c ({[]} \<union> A ;; A\<star>)"
       
   195     by (simp only: star_cases[symmetric])
       
   196   also have "... = Der c (A ;; A\<star>)"
       
   197     by (simp only: Der_union Der_empty) (simp)
       
   198   also have "... = (Der c A) ;; A\<star> \<union> (if [] \<in> A then Der c (A\<star>) else {})"
       
   199     by simp
       
   200   also have "... =  (Der c A) ;; A\<star>"
       
   201     unfolding Seq_def Der_def
       
   202     by (auto dest: star_decomp)
       
   203   finally show "Der c (A\<star>) = (Der c A) ;; A\<star>" .
       
   204 qed
       
   205 
       
   206 
       
   207 lemma der_correctness:
       
   208   shows "L (der c r) = Der c (L r)"
       
   209 by (induct r) 
       
   210    (simp_all add: nullable_correctness)
       
   211 
       
   212 lemma matcher_correctness:
       
   213   shows "matcher r s \<longleftrightarrow> s \<in> L r"
       
   214 by (induct s arbitrary: r)
       
   215    (simp_all add: nullable_correctness der_correctness Der_def)
       
   216 
       
   217 section {* Examples *}
       
   218 
       
   219 definition 
       
   220   "CHRA \<equiv> CHAR (CHR ''a'')"
       
   221 
       
   222 definition 
       
   223   "ALT1 \<equiv> ALT CHRA EMPTY"
       
   224 
       
   225 definition 
       
   226   "SEQ3 \<equiv> SEQ (SEQ ALT1 ALT1) ALT1"
       
   227 
       
   228 value "matcher SEQ3 ''aaa''"
       
   229 
       
   230 value "matcher NULL []"
       
   231 value "matcher (CHAR (CHR ''a'')) [CHR ''a'']"
       
   232 value "matcher (CHAR a) [a,a]"
       
   233 value "matcher (STAR (CHAR a)) []"
       
   234 value "matcher (STAR (CHAR a))  [a,a]"
       
   235 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
       
   236 value "matcher (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
       
   237 
       
   238 section {* Incorrect Matcher - fun-definition rejected *}
       
   239 
       
   240 fun
       
   241   match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
       
   242 where
       
   243   "match [] [] = True"
       
   244 | "match [] (c # s) = False"
       
   245 | "match (NULL # rs) s = False"  
       
   246 | "match (EMPTY # rs) s = match rs s"
       
   247 | "match (CHAR c # rs) [] = False"
       
   248 | "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
       
   249 | "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
       
   250 | "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
       
   251 | "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
       
   252 
   133 
   253 
   134 
   254 end    
   135 end