Matcher.thy
author zhang
Thu, 27 Jan 2011 11:50:58 +0000
changeset 41 dbbc7989e753
parent 24 f72c82bf59e5
child 102 5fed809d0fc1
permissions -rw-r--r--
a newer version
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     1
theory Matcher
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     2
  imports "Main" 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     3
begin
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     4
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     5
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     6
section {* Sequential Composition of Sets *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     7
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     8
fun
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
     9
  lang_seq :: "string set \<Rightarrow> string set \<Rightarrow> string set" ("_ ; _" [100,100] 100)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    10
where 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    11
  "L1 ; L2 = {s1 @ s2 | s1 s2. s1 \<in> L1 \<and> s2 \<in> L2}"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    12
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    13
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    14
section {* Kleene Star for Sets *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    15
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    16
inductive_set
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    17
  Star :: "string set \<Rightarrow> string set" ("_\<star>" [101] 102)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    18
  for L :: "string set"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    19
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    20
  start[intro]: "[] \<in> L\<star>"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    21
| step[intro]:  "\<lbrakk>s1 \<in> L; s2 \<in> L\<star>\<rbrakk> \<Longrightarrow> s1 @ s2 \<in> L\<star>"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    22
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    23
24
f72c82bf59e5 added paper
urbanc
parents: 5
diff changeset
    24
text {* A standard property of Star *}
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    25
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    26
lemma lang_star_cases:
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    27
  shows "L\<star> =  {[]} \<union> L ; L\<star>"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    28
by (auto) (metis Star.simps)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    29
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    30
section {* Regular Expressions *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    31
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    32
datatype rexp =
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    33
  NULL
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    34
| EMPTY
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    35
| CHAR char
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    36
| SEQ rexp rexp
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    37
| ALT rexp rexp
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    38
| STAR rexp
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    39
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    40
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    41
section {* Semantics of Regular Expressions *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    42
 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    43
fun
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    44
  L :: "rexp \<Rightarrow> string set"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    45
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    46
  "L (NULL) = {}"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    47
| "L (EMPTY) = {[]}"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    48
| "L (CHAR c) = {[c]}"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    49
| "L (SEQ r1 r2) = (L r1) ; (L r2)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    50
| "L (ALT r1 r2) = (L r1) \<union> (L r2)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    51
| "L (STAR r) = (L r)\<star>"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    52
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    53
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    54
section {* The Matcher *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    55
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    56
fun
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    57
 nullable :: "rexp \<Rightarrow> bool"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    58
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    59
  "nullable (NULL) = False"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    60
| "nullable (EMPTY) = True"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    61
| "nullable (CHAR c) = False"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    62
| "nullable (ALT r1 r2) = (nullable r1 \<or> nullable r2)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    63
| "nullable (SEQ r1 r2) = (nullable r1 \<and> nullable r2)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    64
| "nullable (STAR r) = True"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    65
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    66
fun
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    67
 der :: "char \<Rightarrow> rexp \<Rightarrow> rexp"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    68
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    69
  "der c (NULL) = NULL"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    70
| "der c (EMPTY) = NULL"
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
    71
| "der c (CHAR c') = (if c = c' then EMPTY else NULL)"
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    72
| "der c (ALT r1 r2) = ALT (der c r1) (der c r2)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    73
| "der c (SEQ r1 r2) = ALT (SEQ (der c r1) r2) (if nullable r1 then der c r2 else NULL)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    74
| "der c (STAR r) = SEQ (der c r) (STAR r)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    75
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    76
fun 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    77
 derivative :: "string \<Rightarrow> rexp \<Rightarrow> rexp"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    78
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    79
  "derivative [] r = r"
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
    80
| "derivative (c # s) r = derivative s (der c r)"
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    81
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    82
fun
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    83
  matches :: "rexp \<Rightarrow> string \<Rightarrow> bool"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    84
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    85
  "matches r s = nullable (derivative s r)"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    86
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    87
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    88
section {* Correctness Proof of the Matcher *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    89
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    90
lemma nullable_correctness:
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    91
  shows "nullable r \<longleftrightarrow> [] \<in> L r"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    92
by (induct r) (auto) 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    93
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    94
lemma der_correctness:
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
    95
  shows "s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r"
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    96
proof (induct r arbitrary: s)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
    97
  case (SEQ r1 r2 s)
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
    98
  have ih1: "\<And>s. s \<in> L (der c r1) \<longleftrightarrow> c # s \<in> L r1" by fact
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
    99
  have ih2: "\<And>s. s \<in> L (der c r2) \<longleftrightarrow> c # s \<in> L r2" by fact
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   100
  show "s \<in> L (der c (SEQ r1 r2)) \<longleftrightarrow> c # s \<in> L (SEQ r1 r2)" 
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   101
    using ih1 ih2 by (auto simp add: nullable_correctness Cons_eq_append_conv)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   102
next
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   103
  case (STAR r s)
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   104
  have ih: "\<And>s. s \<in> L (der c r) \<longleftrightarrow> c # s \<in> L r" by fact
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   105
  show "s \<in> L (der c (STAR r)) \<longleftrightarrow> c # s \<in> L (STAR r)"
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   106
  proof
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   107
    assume "s \<in> L (der c (STAR r))"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   108
    then have "s \<in> L (der c r) ; L r\<star>" by simp
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   109
    then have "c # s \<in> L r ; (L r)\<star>" 
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   110
      by (auto simp add: ih Cons_eq_append_conv)
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   111
    then have "c # s \<in> (L r)\<star>" using lang_star_cases by auto
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   112
    then show "c # s \<in> L (STAR r)" by simp
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   113
  next
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   114
    assume "c # s \<in> L (STAR r)"
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   115
    then have "c # s \<in> (L r)\<star>" by simp
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   116
    then have "s \<in> L (der c r); (L r)\<star>"
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   117
      by (induct x\<equiv>"c # s" rule: Star.induct)
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   118
         (auto simp add: ih append_eq_Cons_conv)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   119
    then show "s \<in> L (der c (STAR r))" by simp  
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   120
  qed
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   121
qed (simp_all)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   122
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   123
lemma matches_correctness:
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   124
  shows "matches r s \<longleftrightarrow> s \<in> L r"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   125
by (induct s arbitrary: r)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   126
   (simp_all add: nullable_correctness der_correctness)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   127
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   128
section {* Examples *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   129
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   130
value "matches NULL []"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   131
value "matches (CHAR (CHR ''a'')) [CHR ''a'']"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   132
value "matches (CHAR a) [a,a]"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   133
value "matches (STAR (CHAR a)) []"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   134
value "matches (STAR (CHAR a))  [a,a]"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   135
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbbbc''"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   136
value "matches (SEQ (CHAR (CHR ''a'')) (SEQ (STAR (CHAR (CHR ''b''))) (CHAR (CHR ''c'')))) ''abbcbbc''"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   137
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   138
section {* Incorrect Matcher - fun-definition rejected *}
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   139
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   140
function 
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   141
  match :: "rexp list \<Rightarrow> string \<Rightarrow> bool"
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   142
where
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   143
  "match [] [] = True"
5
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   144
| "match [] (c # s) = False"
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   145
| "match (NULL # rs) s = False"  
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   146
| "match (EMPTY # rs) s = match rs s"
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   147
| "match (CHAR c # rs) [] = False"
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   148
| "match (CHAR c # rs) (d # s) = (if c = d then match rs s else False)"         
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   149
| "match (ALT r1 r2 # rs) s = (match (r1 # rs) s \<or> match (r2 # rs) s)" 
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   150
| "match (SEQ r1 r2 # rs) s = match (r1 # r2 # rs) s"
074d9a4b2bc9 added a file about the easy closure properties of regular sets (the difficult parts, like complement, are missing)
urbanc
parents: 3
diff changeset
   151
| "match (STAR r # rs) s = (match rs s \<or> match (r # (STAR r) # rs) s)"
3
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   152
apply(pat_completeness)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   153
apply(auto)
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   154
done
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   155
8d657fa3ba2e added simple regexp matcher from Slind et al
urbanc
parents:
diff changeset
   156
end