author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Mon, 07 Mar 2016 18:56:41 +0000 | |
changeset 121 | 4c85af262ee7 |
parent 120 | d74bfa11802c |
child 122 | 7c6c907660d8 |
permissions | -rw-r--r-- |
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
theory Paper |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
imports "../ReStar" "~~/src/HOL/Library/LaTeXsugar" |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
begin |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
5 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
6 |
declare [[show_question_marks = false]] |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
7 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
8 |
abbreviation |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
9 |
"der_syn r c \<equiv> der c r" |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
10 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
11 |
abbreviation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
12 |
"ders_syn r s \<equiv> ders s r" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
13 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
14 |
notation (latex output) |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
15 |
If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
16 |
Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,73] 73) and |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
17 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
18 |
ZERO ("\<^bold>0" 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
19 |
ONE ("\<^bold>1" 78) and |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
20 |
CHAR ("_" [1000] 80) and |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
21 |
ALT ("_ + _" [77,77] 78) and |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
22 |
SEQ ("_ \<cdot> _" [77,77] 78) and |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
23 |
STAR ("_\<^sup>\<star>" [1000] 78) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
24 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
25 |
val.Void ("'(')" 79) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
26 |
val.Char ("Char _" [1000] 79) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
27 |
val.Left ("Left _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
28 |
val.Right ("Right _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
29 |
val.Seq ("Seq _ _" [79,79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
30 |
val.Stars ("Stars _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
31 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
32 |
L ("L'(_')" [10] 78) and |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
33 |
der_syn ("_\\_" [79, 1000] 76) and |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
34 |
ders_syn ("_\\_" [79, 1000] 76) and |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
35 |
flat ("|_|" [75] 74) and |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
36 |
Sequ ("_ @ _" [78,77] 63) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
37 |
injval ("inj _ _ _" [79,77,79] 76) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
38 |
mkeps ("mkeps _" [79] 76) and |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
39 |
projval ("proj _ _ _" [1000,77,1000] 77) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
40 |
length ("len _" [78] 73) and |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
41 |
matcher ("lexer _ _" [78,78] 77) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
42 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
43 |
Prf ("_ : _" [75,75] 75) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
44 |
PMatch ("'(_, _') \<rightarrow> _" [63,75,75] 75) |
105
80218dddbb15
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
103
diff
changeset
|
45 |
(* and ValOrd ("_ \<succeq>\<^bsub>_\<^esub> _" [78,77,77] 73) *) |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
46 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
47 |
definition |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
48 |
"match r s \<equiv> nullable (ders s r)" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
49 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
(*>*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
section {* Introduction *} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
54 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
55 |
text {* |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
56 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
57 |
Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
58 |
derivative} @{term "der c r"} of a regular expression @{text r} w.r.t.\ a |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
59 |
character~@{text c}, and showed that it gave a simple solution to the |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
60 |
problem of matching a string @{term s} with a regular expression @{term r}: |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
61 |
if the derivative of @{term r} w.r.t.\ (in succession) all the characters of |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
62 |
the string matches the empty string, then @{term r} matches @{term s} (and |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
63 |
{\em vice versa}). The derivative has the property (which may almost be |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
64 |
regarded as its specification) that, for every string @{term s} and regular |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
65 |
expression @{term r} and character @{term c}, one has @{term "cs \<in> L(r)"} if |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
66 |
and only if \mbox{@{term "s \<in> L(der c r)"}}. The beauty of Brzozowski's |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
67 |
derivatives is that they are neatly expressible in any functional language, |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
68 |
and easily definable and reasoned about in theorem provers---the definitions |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
69 |
just consist of inductive datatypes and simple recursive functions. A |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
70 |
completely formalised correctness proof of this matcher in for example HOL4 |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
71 |
has been mentioned in~\cite{Owens2008}. Another one in Isabelle/HOL is part |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
72 |
of the work in \cite{Krauss2011}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
73 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
74 |
One limitation of Brzozowski's matcher is that it only generates a YES/NO |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
75 |
answer for whether a string is being matched by a regular expression. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
76 |
Sulzmann and Lu \cite{Sulzmann2014} extended this matcher to allow |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
77 |
generation not just of a YES/NO answer but of an actual matching, called a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
78 |
[lexical] {\em value}. They give a simple algorithm to calculate a value |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
79 |
that appears to be the value associated with POSIX matching |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
80 |
\cite{Kuklewicz,Vansummeren2006}. The challenge then is to specify that |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
81 |
value, in an algorithm-independent fashion, and to show that Sulzmann and |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
82 |
Lu's derivative-based algorithm does indeed calculate a value that is |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
83 |
correct according to the specification. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
84 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
85 |
The answer given by Sulzmann and Lu \cite{Sulzmann2014} is to define a |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
86 |
relation (called an ``order relation'') on the set of values of @{term r}, |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
87 |
and to show that (once a string to be matched is chosen) there is a maximum |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
88 |
element and that it is computed by their derivative-based algorithm. This |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
89 |
proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
90 |
GREEDY regular expression matching algorithm. Beginning with our |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
91 |
observations that, without evidence that it is transitive, it cannot be |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
92 |
called an ``order relation'', and that the relation is called a ``total |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
93 |
order'' despite being evidently not total\footnote{The relation @{text |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
94 |
"\<ge>\<^bsub>r\<^esub>"} defined in \cite{Sulzmann2014} is a relation on the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
95 |
values for the regular expression @{term r}; but it only holds between |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
96 |
@{term v} and @{term "v'"} in cases where @{term v} and @{term "v'"} have |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
97 |
the same flattening (underlying string). So a counterexample to totality is |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
98 |
given by taking two values @{term v} and @{term "v'"} for @{term r} that |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
99 |
have different flattenings (see Section~\ref{posixsec}). A different |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
100 |
relation @{text "\<ge>\<^bsub>r,s\<^esub>"} on the set of values for @{term r} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
101 |
with flattening @{term s} is definable by the same approach, and is indeed |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
102 |
total; but that is not what Proposition 1 of \cite{Sulzmann2014} does.}, we |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
103 |
identify problems with this approach (of which some of the proofs are not |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
104 |
published in \cite{Sulzmann2014}); perhaps more importantly, we give a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
105 |
simple inductive (and algorithm-independent) definition of what we call |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
106 |
being a {\em POSIX value} for a regular expression @{term r} and a string |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
107 |
@{term s}; we show that the algorithm computes such a value and that such a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
108 |
value is unique. Proofs are both done by hand and checked in Isabelle/HOL. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
109 |
The experience of doing our proofs has been that this mechanical checking |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
110 |
was absolutely essential: this subject area has hidden snares. This was also |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
111 |
noted by Kuklewitz \cite{Kuklewicz} who found that nearly all POSIX matching |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
112 |
implementations are ``buggy'' \cite[Page 203]{Sulzmann2014}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
113 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
114 |
If a regular expression matches a string, then in general there is more than |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
115 |
one way of how the string is matched. There are two commonly used |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
116 |
disambiguation strategies to generate a unique answer: one is called GREEDY |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
117 |
matching \cite{Frisch2004} and the other is POSIX |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
118 |
matching~\cite{Kuklewicz,Sulzmann2014,Vansummeren2006}. For example consider |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
119 |
the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
120 |
(ALT x y) xy)"}}. Either the string can be matched in two `iterations' by |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
121 |
the single letter-regular expressions @{term x} and @{term y}, or directly |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
122 |
in one iteration by @{term xy}. The first case corresponds to GREEDY |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
123 |
matching, which first matches with the left-most symbol and only matches the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
124 |
next symbol in case of a mismatch (this is greedy in the sense of preferring |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
125 |
instant gratification to delayed repletion). The second case is POSIX |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
126 |
matching, which prefers the longest match. |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
127 |
|
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
128 |
In the context of lexing, where an input string needs to be split up into a |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
129 |
sequence of tokens, POSIX is the more natural disambiguation strategy for |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
130 |
what programmers consider basic syntactic building blocks in their programs. |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
131 |
These building blocks are often specified by some regular expressions, say |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
132 |
@{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
133 |
identifiers, respectively. There are two underlying (informal) rules behind |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
134 |
tokenising a string in a POSIX fashion: |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
135 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
136 |
\begin{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
137 |
\item[$\bullet$] \underline{The Longest Match Rule (or ``maximal munch rule''):} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
138 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
139 |
The longest initial substring matched by any regular expression is taken as |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
140 |
next token.\smallskip |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
141 |
|
119
71e26f43c896
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
118
diff
changeset
|
142 |
\item[$\bullet$] \underline{Priority Rule:} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
143 |
|
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
144 |
For a particular longest initial substring, the first regular expression |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
145 |
that can match determines the token. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
146 |
\end{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
147 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
148 |
\noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords such as |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
149 |
@{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
150 |
identifiers (say, a single character followed by characters or numbers). |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
151 |
Then we can form the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
152 |
POSIX matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
153 |
For @{text "iffoo"} we obtain by the longest match rule a single identifier |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
154 |
token, not a keyword followed by an identifier. For @{text "if"} we obtain by |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
155 |
the priority rule a keyword token, not an identifier token---even if @{text |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
156 |
"r\<^bsub>id\<^esub>"} matches also.\bigskip |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
157 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
158 |
\noindent {\bf Contributions:} (NOT DONE YET) We have implemented in |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
159 |
Isabelle/HOL the derivative-based regular expression matching algorithm as |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
160 |
described by Sulzmann and Lu \cite{Sulzmann2014}. We have proved the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
161 |
correctness of this algorithm according to our specification of what a POSIX |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
162 |
value is. Sulzmann and Lu sketch in \cite{Sulzmann2014} an informal |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
163 |
correctness proof: but to us it contains unfillable gaps. |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
164 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
165 |
informal correctness proof given in \cite{Sulzmann2014} is in final |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
166 |
form\footnote{} and to us contains unfillable gaps. |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
167 |
|
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
168 |
Our specification of a POSIX value consists of a simple inductive definition |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
169 |
that given a string and a regular expression uniquely determines this value. |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
170 |
Derivatives as calculated by Brzozowski's method are usually more complex |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
171 |
regular expressions than the initial one; various optimisations are |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
172 |
possible, such as the simplifications of @{term "ALT ZERO r"}, @{term "ALT r |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
173 |
ZERO"}, @{term "SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r}. One of the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
174 |
advantages of having a simple specification and correctness proof is that |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
175 |
the latter can be refined to allow for such optimisations and simple |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
176 |
correctness proof. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
177 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
178 |
An extended version of \cite{Sulzmann2014} is available at the website of |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
179 |
its first author; this includes some ``proofs'', claimed in |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
180 |
\cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
181 |
final form, we make no comment thereon, preferring to give general reasons |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
182 |
for our belief that the approach of \cite{Sulzmann2014} is problematic |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
183 |
rather than to discuss details of unpublished work. |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
184 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
185 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
186 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
187 |
section {* Preliminaries *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
188 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
189 |
text {* \noindent Strings in Isabelle/HOL are lists of characters with the |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
190 |
empty string being represented by the empty list, written @{term "[]"}, and |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
191 |
list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
192 |
bracket notation for lists also for strings; for example a string consisting |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
193 |
of just a single character @{term c} is written @{term "[c]"}. By using the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
194 |
type @{type char} for characters we have a supply of finitely many |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
195 |
characters roughly corresponding to the ASCII character set. Regular |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
196 |
expressions are defined as usual as the elements of the following inductive |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
197 |
datatype: |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
198 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
199 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
200 |
@{text "r :="} |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
201 |
@{const "ZERO"} $\mid$ |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
202 |
@{const "ONE"} $\mid$ |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
203 |
@{term "CHAR c"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
204 |
@{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
205 |
@{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
206 |
@{term "STAR r"} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
207 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
208 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
209 |
\noindent where @{const ZERO} stands for the regular expression that does |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
210 |
not match any string, @{const ONE} for the regular expression that matches |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
211 |
only the empty string and @{term c} for matching a character literal. The |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
212 |
language of a regular expression is also defined as usual by the |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
213 |
recursive function @{term L} with the clauses: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
214 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
215 |
\begin{center} |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
216 |
\begin{tabular}{l@ {\hspace{5mm}}rcl} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
217 |
(1) & @{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
218 |
(2) & @{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
219 |
(3) & @{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
220 |
(4) & @{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
221 |
(5) & @{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
222 |
(6) & @{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
223 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
224 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
225 |
|
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
226 |
\noindent In clause (4) we use the operation @{term "DUMMY ;; |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
227 |
DUMMY"} for the concatenation of two languages (it is also list-append for |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
228 |
strings). We use the star-notation for regular expressions and for |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
229 |
languages (in the last clause above). The star for languages is defined |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
230 |
inductively by two clauses: @{text "(i)"} the empty string being in |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
231 |
the star of a language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
232 |
language and @{term "s\<^sub>2"} in the star of this language, then also @{term |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
233 |
"s\<^sub>1 @ s\<^sub>2"} is in the star of this language. It will also be convenient |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
234 |
to use the following notion of a \emph{semantic derivative} (or \emph{left |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
235 |
quotient}) of a language defined as: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
236 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
237 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
238 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
239 |
@{thm (lhs) Der_def} & $\dn$ & @{thm (rhs) Der_def}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
240 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
241 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
242 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
243 |
\noindent |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
244 |
For semantic derivatives we have the following equations (for example |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
245 |
mechanically proved in \cite{Krauss2011}): |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
246 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
247 |
\begin{equation}\label{SemDer} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
248 |
\begin{array}{lcl} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
249 |
@{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\ |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
250 |
@{thm (lhs) Der_empty} & \dn & @{thm (rhs) Der_empty}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
251 |
@{thm (lhs) Der_char} & \dn & @{thm (rhs) Der_char}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
252 |
@{thm (lhs) Der_union} & \dn & @{thm (rhs) Der_union}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
253 |
@{thm (lhs) Der_Sequ} & \dn & @{thm (rhs) Der_Sequ}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
254 |
@{thm (lhs) Der_star} & \dn & @{thm (rhs) Der_star} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
255 |
\end{array} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
256 |
\end{equation} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
257 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
258 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
259 |
\noindent \emph{\Brz's derivatives} of regular expressions |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
260 |
\cite{Brzozowski1964} can be easily defined by two recursive functions: |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
261 |
the first is from regular expressions to booleans (implementing a test |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
262 |
when a regular expression can match the empty string), and the second |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
263 |
takes a regular expression and a character to a (derivative) regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
264 |
expression: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
265 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
266 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
267 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
268 |
@{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
269 |
@{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
270 |
@{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
271 |
@{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
272 |
@{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
273 |
@{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
274 |
@{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
275 |
@{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
276 |
@{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
277 |
@{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
278 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
279 |
@{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
280 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
281 |
\end{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
282 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
283 |
\noindent |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
284 |
We may extend this definition to give derivatives w.r.t.~strings: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
285 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
286 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
287 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
288 |
@{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
289 |
@{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
290 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
291 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
292 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
293 |
\noindent Given the equations in \eqref{SemDer}, it is a relatively easy |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
294 |
exercise in mechanical reasoning to establish that |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
295 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
296 |
\begin{proposition}\mbox{}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
297 |
\begin{tabular}{ll} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
298 |
@{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
299 |
@{thm (rhs) nullable_correctness}, and \\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
300 |
@{text "(2)"} & @{thm[mode=IfThen] der_correctness}. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
301 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
302 |
\end{proposition} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
303 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
304 |
\noindent With this in place it is also very routine to prove that the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
305 |
regular expression matcher defined as |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
306 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
307 |
\begin{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
308 |
@{thm match_def} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
309 |
\end{center} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
310 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
311 |
\noindent gives a positive answer if and only if @{term "s \<in> L r"}. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
312 |
Consequently, this regular expression matching algorithm satisfies the |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
313 |
usual specification for regular expression matching. While the matcher |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
314 |
above calculates a provably correct YES/NO answer for whether a regular |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
315 |
expression matches a string or not, the novel idea of Sulzmann and Lu |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
316 |
\cite{Sulzmann2014} is to append another phase to this algorithm in order |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
317 |
to calculate a [lexical] value. We will explain the details next. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
318 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
319 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
320 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
321 |
section {* POSIX Regular Expression Matching\label{posixsec} *} |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
322 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
323 |
text {* |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
324 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
325 |
The clever idea in \cite{Sulzmann2014} is to introduce values for encoding |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
326 |
\emph{how} a regular expression matches a string and then define a |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
327 |
function on values that mirrors (but inverts) the construction of the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
328 |
derivative on regular expressions. \emph{Values} are defined as the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
329 |
inductive datatype |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
330 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
331 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
332 |
@{text "v :="} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
333 |
@{const "Void"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
334 |
@{term "val.Char c"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
335 |
@{term "Left v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
336 |
@{term "Right v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
337 |
@{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
338 |
@{term "Stars vs"} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
339 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
340 |
|
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
341 |
\noindent where we use @{term vs} to stand for a list of values. (This is |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
342 |
similar to the approach taken by Frisch and Cardelli for GREEDY matching |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
343 |
\cite{Frisch2004}, and Sulzmann and Lu \cite{Sulzmann2014} for POSIX |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
344 |
matching). The string underlying a value can be calculated by the @{const |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
345 |
flat} function, written @{term "flat DUMMY"} and defined as: |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
346 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
347 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
348 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
349 |
@{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
350 |
@{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
351 |
@{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
352 |
@{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
353 |
@{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
354 |
@{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
355 |
@{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
356 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
357 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
358 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
359 |
\noindent Sulzmann and Lu also define inductively an inhabitation relation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
360 |
that associates values to regular expressions: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
361 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
362 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
363 |
\begin{tabular}{c} |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
364 |
@{thm[mode=Axiom] Prf.intros(4)} \qquad |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
365 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
366 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
367 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
368 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
369 |
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
370 |
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
371 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
372 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
373 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
374 |
\noindent Note that no values are associated with the regular expression |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
375 |
@{term ZERO}, and that the only value associated with the regular |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
376 |
expression @{term ONE} is @{term Void}, pronounced (if one must) as @{text |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
377 |
"Void"}. It is routine to establish how values ``inhabiting'' a regular |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
378 |
expression correspond to the language of a regular expression, namely |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
379 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
380 |
\begin{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
381 |
@{thm L_flat_Prf} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
382 |
\end{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
383 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
384 |
In general there is more than one value associated with a regular |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
385 |
expression. In case of POSIX matching the problem is to calculate the |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
386 |
unique value that satisfies the (informal) POSIX rules from the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
387 |
Introduction. Graphically the POSIX value calculation algorithm by |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
388 |
Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
389 |
where the path from the left to the right involving @{const der}/@{const |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
390 |
nullable} is the first phase of the algorithm (calculating successive |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
391 |
\Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
392 |
left, the second phase. This picture shows the steps required when a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
393 |
regular expression, say @{text "r\<^sub>1"}, matches the string @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
394 |
"[a,b,c]"}. We first build the three derivatives (according to @{term a}, |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
395 |
@{term b} and @{term c}). We then use @{const nullable} to find out |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
396 |
whether the resulting derivative regular expression @{term "r\<^sub>4"} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
397 |
can match the empty string. If yes, we call the function @{const mkeps} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
398 |
that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
399 |
match the empty string (taking into account the POSIX rules in case |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
400 |
there are several ways). This functions is defined by the clauses: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
401 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
402 |
\begin{figure}[t] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
403 |
\begin{center} |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
404 |
\begin{tikzpicture}[scale=2,node distance=1.3cm, |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
405 |
every node/.style={minimum size=7mm}] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
406 |
\node (r1) {@{term "r\<^sub>1"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
407 |
\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
408 |
\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
409 |
\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
410 |
\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
411 |
\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
412 |
\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
413 |
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
414 |
\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
415 |
\draw[->,line width=1mm](r4) -- (v4); |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
416 |
\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
417 |
\draw[->,line width=1mm](v4)--(v3) node[below,midway] {@{text "inj r\<^sub>3 c"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
418 |
\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
419 |
\draw[->,line width=1mm](v3)--(v2) node[below,midway] {@{text "inj r\<^sub>2 b"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
420 |
\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
421 |
\draw[->,line width=1mm](v2)--(v1) node[below,midway] {@{text "inj r\<^sub>1 a"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
422 |
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
423 |
\end{tikzpicture} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
424 |
\end{center} |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
425 |
\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
426 |
matching the string @{term "[a,b,c]"}. The first phase (the arrows from |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
427 |
left to right) is \Brz's matcher building succesive derivatives. If the |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
428 |
last regular expression is @{term nullable}, then the functions of the |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
429 |
second phase are called (the top-down and right-to-left arrows): first |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
430 |
@{term mkeps} calculates a value witnessing |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
431 |
how the empty string has been recognised by @{term "r\<^sub>4"}. After |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
432 |
that the function @{term inj} `injects back' the characters of the string into |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
433 |
the values. |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
434 |
\label{Sulz}} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
435 |
\end{figure} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
436 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
437 |
\begin{center} |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
438 |
\begin{tabular}{lcl} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
439 |
@{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
440 |
@{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
441 |
@{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
442 |
@{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
443 |
\end{tabular} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
444 |
\end{center} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
445 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
446 |
\noindent Note that this function needs only to be partially defined, |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
447 |
namely only for regular expressions that are nullable. In case @{const |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
448 |
nullable} fails, the string @{term "[a,b,c]"} cannot be matched by @{term |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
449 |
"r\<^sub>1"} and an error is raised instead. Note also how this function |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
450 |
makes some subtle choices leading to a POSIX value: for example if an |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
451 |
alternative regular expression, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
452 |
match the empty string and furthermore @{term "r\<^sub>1"} can match the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
453 |
empty string, then we return a @{text Left}-value. The @{text |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
454 |
Right}-value will only be returned if @{term "r\<^sub>1"} cannot match the empty |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
455 |
string. |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
456 |
|
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
457 |
The most interesting idea from Sulzmann and Lu \cite{Sulzmann2014} is |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
458 |
the construction of a value for how @{term "r\<^sub>1"} can match the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
459 |
string @{term "[a,b,c]"} from the value how the last derivative, @{term |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
460 |
"r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
461 |
Lu achieve this by stepwise ``injecting back'' the characters into the |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
462 |
values thus inverting the operation of building derivatives on the level |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
463 |
of values. The corresponding function, called @{term inj}, takes three |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
464 |
arguments, a regular expression, a character and a value. For example in |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
465 |
the first (or right-most) @{term inj}-step in Fig~\ref{Sulz} the regular |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
466 |
expression @{term "r\<^sub>3"}, the character @{term c} from the last |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
467 |
derivative step and @{term "v\<^sub>4"}, which is the value corresponding |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
468 |
to the derivative regular expression @{term "r\<^sub>4"}. The result is |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
469 |
the new value @{term "v\<^sub>3"}. The final result of the algorithm is |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
470 |
the value @{term "v\<^sub>1"} corresponding to the input regular |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
471 |
expression. The @{term inj} function is by recursion on the regular |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
472 |
expressions and by analysing the shape of values (corresponding to |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
473 |
the derivative regular expressions). |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
474 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
475 |
\begin{center} |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
476 |
\begin{tabular}{l@ {\hspace{5mm}}lcl} |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
477 |
(1) & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
478 |
(2) & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
479 |
@{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
480 |
(3) & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
481 |
@{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
482 |
(4) & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
483 |
& @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
484 |
(5) & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
485 |
& @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
486 |
(6) & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
487 |
& @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
488 |
(7) & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
489 |
& @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
490 |
\end{tabular} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
491 |
\end{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
492 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
493 |
\noindent To better understand what is going on in this definition it |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
494 |
might be instructive to look first at the three sequence cases (clauses |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
495 |
(4)--(6)). In each case we need to construct an ``injected value'' for |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
496 |
@{term "SEQ r\<^sub>1 r\<^sub>2"}. This must be a value of the form @{term |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
497 |
"Seq DUMMY DUMMY"}. Recall the clause of the @{const der}-function |
117
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
498 |
for sequence regular expressions: |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
499 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
500 |
\begin{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
501 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} $\dn$ @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
502 |
\end{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
503 |
|
117
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
504 |
\noindent Consider first the else-branch where the derivative is @{term |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
505 |
"SEQ (der c r\<^sub>1) r\<^sub>2"}. The corresponding value must therefore |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
506 |
be the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches the left-hand |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
507 |
side in clause (4) of @{term inj}. In the if-branch the derivative is an |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
508 |
alternative, namely @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
509 |
r\<^sub>2)"}. This means we either have to consider a @{text Left}- or |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
510 |
@{text Right}-value. In case of the @{text Left}-value we know further it |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
511 |
must be a value for a sequence regular expression. Therefore the pattern |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
512 |
we match in the clause (5) is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
513 |
while in (6) it is just @{term "Right v\<^sub>2"}. One more interesting |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
514 |
point is in the right-hand side of clause (6): since in this case the |
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
515 |
regular expression @{text "r\<^sub>1"} does not ``contribute'' to |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
516 |
matching the string, that means it only matches the empty string, we need to |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
517 |
call @{const mkeps} in order to construct a value for how @{term "r\<^sub>1"} |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
518 |
can match this empty string. A similar argument applies for why we can |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
519 |
expect in the left-hand side of clause (7) that the value is of the form |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
520 |
@{term "Seq v (Stars vs)"}---the derivative of a star is @{term "SEQ r |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
521 |
(STAR r)"}. Finally, the reason for why we can ignore the second argument |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
522 |
in clause (1) of @{term inj} is that it will only ever be called in cases |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
523 |
where @{term "c=d"}, but the usual linearity restrictions in patterns do |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
524 |
not allow is to build this constraint explicitly into our function |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
525 |
definition.\footnote{Sulzmann and Lu state this clause as @{thm (lhs) |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
526 |
injval.simps(1)[of "c" "c"]} $\dn$ @{thm (rhs) injval.simps(1)[of "c"]}, |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
527 |
but our deviation is harmless.} |
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
528 |
|
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
529 |
The idea of @{term inj} to ``inject back'' a character into a value can |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
530 |
be made precise by the first part of the following lemma; the second |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
531 |
part shows that the underlying string of an @{const mkeps}-value is |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
532 |
the empty string. |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
533 |
|
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
534 |
\begin{lemma}\mbox{}\\\label{Prf_injval_flat} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
535 |
\begin{tabular}{ll} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
536 |
(1) & @{thm[mode=IfThen] Prf_injval_flat}\\ |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
537 |
(2) & @{thm[mode=IfThen] mkeps_flat} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
538 |
\end{tabular} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
539 |
\end{lemma} |
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
540 |
|
118
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
541 |
Having defined the @{const mkeps} and @{text inj} function we can extend |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
542 |
\Brz's matcher so that a [lexical] value is constructed (assuming the |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
543 |
regular expression matches the string). The clauses of the lexer are |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
544 |
|
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
545 |
\begin{center} |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
546 |
\begin{tabular}{lcl} |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
547 |
@{thm (lhs) matcher.simps(1)} & $\dn$ & @{thm (rhs) matcher.simps(1)}\\ |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
548 |
@{thm (lhs) matcher.simps(2)} & $\dn$ & @{text "case"} @{term "matcher (der c r) s"} @{text of}\\ |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
549 |
& & \phantom{$|$} @{term "None"} @{text "\<Rightarrow>"} @{term None}\\ |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
550 |
& & $|$ @{term "Some v"} @{text "\<Rightarrow>"} @{term "Some (injval r c v)"} |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
551 |
\end{tabular} |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
552 |
\end{center} |
79efc0bcfc96
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
117
diff
changeset
|
553 |
|
119
71e26f43c896
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
118
diff
changeset
|
554 |
\noindent If the regular expression does not match, @{const None} is |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
555 |
returned, indicating an error is raised. If the regular expression does |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
556 |
match the string, then @{const Some} value is returned. One important |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
557 |
virtue of this algorithm is that it can be implemented with ease in a |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
558 |
functional programming language and also in Isabelle/HOL. In the remaining |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
559 |
part of this section we prove that this algorithm is correct. |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
560 |
|
119
71e26f43c896
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
118
diff
changeset
|
561 |
The well-known idea of POSIX matching is informally defined by the longest |
71e26f43c896
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
118
diff
changeset
|
562 |
match and priority rule; as correctly argued in \cite{Sulzmann2014}, this |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
563 |
needs formal specification. Sulzmann and Lu define a \emph{dominance} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
564 |
relation\footnote{Sulzmann and Lu call it an ordering relation, but |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
565 |
without giving evidence that it is transitive.} between values and argue that |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
566 |
there is a maximum value, as given by the derivative-based algorithm. In |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
567 |
contrast, we shall next introduce a simple inductive definition to specify |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
568 |
what a \emph{POSIX value} is, incorporating the POSIX-specific choices |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
569 |
into the side-conditions of our rules. Our definition is inspired by the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
570 |
matching relation given in \cite{Vansummeren2006}. The relation we define |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
571 |
is ternary and written as \mbox{@{term "s \<in> r \<rightarrow> v"}}, relating strings, |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
572 |
regular expressions and values. |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
573 |
|
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
574 |
\begin{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
575 |
\begin{tabular}{c} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
576 |
@{thm[mode=Axiom] PMatch.intros(1)} \qquad |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
577 |
@{thm[mode=Axiom] PMatch.intros(2)}\bigskip\\ |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
578 |
@{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad |
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
579 |
@{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\bigskip\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
580 |
$\mprset{flushleft} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
581 |
\inferrule |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
582 |
{@{thm (prem 1) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
583 |
@{thm (prem 2) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
584 |
@{thm (prem 3) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
585 |
{@{thm (concl) PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
586 |
@{thm[mode=Axiom] PMatch.intros(7)}\bigskip\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
587 |
$\mprset{flushleft} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
588 |
\inferrule |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
589 |
{@{thm (prem 1) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
590 |
@{thm (prem 2) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
591 |
@{thm (prem 3) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
592 |
@{thm (prem 4) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
593 |
{@{thm (concl) PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$ |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
594 |
\end{tabular} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
595 |
\end{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
596 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
597 |
\noindent We claim that this relation captures the idea behind the two |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
598 |
informal POSIX rules shown in the Introduction: Consider the second line |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
599 |
where the POSIX values for an alternative regular expression is |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
600 |
specified---it is always a @{text "Left"}-value, \emph{except} when the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
601 |
string to be matched is not in the language of @{term "r\<^sub>1"}; only then it |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
602 |
is a @{text Right}-value (see the side-condition in the rule on the |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
603 |
right). Interesting is also the rule for sequence regular expressions |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
604 |
(third line). The first two premises state that @{term "v\<^sub>1"} and |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
605 |
@{term "v\<^sub>2"} are the POSIX values for @{term "(s\<^sub>1, |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
606 |
r\<^sub>1)"} and @{term "(s\<^sub>2, r\<^sub>2)"} respectively. |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
607 |
|
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
608 |
\begin{theorem} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
609 |
@{thm[mode=IfThen] PMatch_determ(1)[of _ _ "v\<^sub>1" "v\<^sub>2"]} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
610 |
\end{theorem} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
611 |
|
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
612 |
\begin{lemma} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
613 |
@{thm[mode=IfThen] PMatch_mkeps} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
614 |
\end{lemma} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
615 |
|
121
4c85af262ee7
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
120
diff
changeset
|
616 |
|
120
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
617 |
|
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
618 |
\begin{lemma} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
619 |
@{thm[mode=IfThen] PMatch2_roy_version} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
620 |
\end{lemma} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
621 |
|
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
622 |
\begin{theorem}\mbox{}\smallskip\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
623 |
\begin{tabular}{ll} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
624 |
(1) & @{thm (lhs) lex_correct1a} if and only if @{thm (rhs) lex_correct1a}\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
625 |
(2) & @{thm (lhs) lex_correct3a} if and only if @{thm (rhs) lex_correct3a}\\ |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
626 |
\end{tabular} |
d74bfa11802c
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
119
diff
changeset
|
627 |
\end{theorem} |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
628 |
*} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
629 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
630 |
section {* The Argument by Sulzmmann and Lu *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
631 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
632 |
section {* Conclusion *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
633 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
634 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
635 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
636 |
Nipkow lexer from 2000 |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
637 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
638 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
639 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
640 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
641 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
642 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
643 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
644 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
645 |
We have also introduced a slightly restricted version of this relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
646 |
where the last rule is restricted so that @{term "flat v \<noteq> []"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
647 |
This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
648 |
\bigskip |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
649 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
650 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
651 |
\noindent |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
652 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
653 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
654 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
655 |
Our version of Sulzmann's ordering relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
656 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
657 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
658 |
\begin{tabular}{c} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
659 |
@{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
660 |
@{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
661 |
@{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
662 |
@{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
663 |
@{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
664 |
@{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "r\<^sub>2"]} \medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
665 |
@{thm[mode=Axiom] ValOrd.intros(7)}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
666 |
@{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
667 |
@{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
668 |
@{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
669 |
@{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
670 |
@{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
671 |
@{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
672 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
673 |
\end{center} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
674 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
675 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
676 |
A prefix of a string s |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
677 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
678 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
679 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
680 |
@{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
681 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
682 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
683 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
684 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
685 |
Values and non-problematic values |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
686 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
687 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
688 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
689 |
@{thm Values_def}\medskip\\ |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
690 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
691 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
692 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
693 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
694 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
695 |
The point is that for a given @{text s} and @{text r} there are only finitely many |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
696 |
non-problematic values. |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
697 |
*} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
698 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
699 |
text {* |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
700 |
\noindent |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
701 |
Some lemmas we have proved:\bigskip |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
702 |
|
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
703 |
@{thm L_flat_Prf} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
704 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
705 |
@{thm L_flat_NPrf} |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
706 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
707 |
@{thm[mode=IfThen] mkeps_nullable} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
708 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
709 |
@{thm[mode=IfThen] mkeps_flat} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
710 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
711 |
@{thm[mode=IfThen] Prf_injval} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
712 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
713 |
@{thm[mode=IfThen] Prf_injval_flat} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
714 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
715 |
@{thm[mode=IfThen] PMatch_mkeps} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
716 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
717 |
@{thm[mode=IfThen] PMatch1(2)} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
718 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
719 |
@{thm[mode=IfThen] PMatch1N} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
720 |
|
100
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
721 |
@{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} |
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
722 |
|
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
723 |
\medskip |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
724 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
725 |
This is the main theorem that lets us prove that the algorithm is correct according to |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
726 |
@{term "s \<in> r \<rightarrow> v"}: |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
727 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
728 |
@{thm[mode=IfThen] PMatch2} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
729 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
730 |
\mbox{}\bigskip |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
731 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
732 |
\noindent {\bf Proof} The proof is by induction on the definition of |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
733 |
@{const der}. Other inductions would go through as well. The |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
734 |
interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
735 |
case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
736 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
737 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
738 |
\begin{array}{l} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
739 |
(IH1)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>1 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
740 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v"}\\ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
741 |
(IH2)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>2 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
742 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> injval r\<^sub>2 c v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
743 |
\end{array} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
744 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
745 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
746 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
747 |
and have |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
748 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
749 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
750 |
@{term "s \<in> ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2) \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
751 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
752 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
753 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
754 |
There are two cases what @{term v} can be: (1) @{term "Left v'"} and (2) @{term "Right v'"}. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
755 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
756 |
\begin{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
757 |
\item[(1)] We know @{term "s \<in> SEQ (der c r\<^sub>1) r\<^sub>2 \<rightarrow> v'"} holds, from which we |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
758 |
can infer that there are @{text "s\<^sub>1"}, @{term "s\<^sub>2"}, @{text "v\<^sub>1"}, @{term "v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
759 |
with |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
760 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
761 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
762 |
@{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} \qquad\text{and}\qquad @{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
763 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
764 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
765 |
and also |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
766 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
767 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
768 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
769 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
770 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
771 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
772 |
and have to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
773 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
774 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
775 |
@{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
776 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
777 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
778 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
779 |
The two requirements @{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} and |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
780 |
@{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} can be proved by the induction hypothese (IH1) and the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
781 |
fact above. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
782 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
783 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
784 |
This leaves to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
785 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
786 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
787 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
788 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
789 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
790 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
791 |
which holds because @{term "(c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 "} implies @{term "s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) "} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
792 |
|
103
ffe5d850df62
added some slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
793 |
\item[(2)] This case is similar. |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
794 |
\end{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
795 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
796 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
797 |
The final case is that @{term " \<not> nullable r\<^sub>1"} holds. This case again similar |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
798 |
to the cases above. |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
799 |
*} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
800 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
801 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
802 |
text {* |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
803 |
%\noindent |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
804 |
%{\bf Acknowledgements:} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
805 |
%We are grateful for the comments we received from anonymous |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
806 |
%referees. |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
807 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
808 |
\bibliographystyle{plain} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
809 |
\bibliography{root} |
101
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
810 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
811 |
\section{Roy's Rules} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
812 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
813 |
\newcommand{\abs}[1]{\mid\!\! #1\!\! \mid} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
814 |
%%\newcommand{\mts}{\textit{``''} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
815 |
\newcommand{\tl}{\ \triangleleft\ } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
816 |
$$\inferrule[]{Void \tl \epsilon}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
817 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
818 |
\inferrule[]{Char\ c \tl Lit\ c}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
819 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
820 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
821 |
{v_1 \tl r_1} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
822 |
{Left\ v_1 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
823 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
824 |
\inferrule[] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
825 |
{ v_2 \tl r_2 \\ \abs{v_2}\ \not\in\ L(r_1)} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
826 |
{Right\ v_2 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
827 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
828 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
829 |
\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
830 |
{v_1 \tl r_1\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
831 |
v_2 \tl r_2\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
832 |
s \in\ L(r_1\backslash\! \abs{v_1}) \ \land\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
833 |
\abs{v_2}\!\backslash s\ \epsilon\ L(r_2) |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
834 |
\ \Rightarrow\ s = [] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
835 |
} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
836 |
{(v_1, v_2) \tl r_1 \cdot r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
837 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
838 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
839 |
{ v \tl r \\ vs \tl r^* \\ \abs{v}\ \not=\ []} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
840 |
{ (v :: vs) \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
841 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
842 |
\inferrule{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
843 |
{ [] \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
844 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
845 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
846 |
*} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
847 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
848 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
849 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
850 |
end |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
851 |
(*>*) |