author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Sat, 05 Mar 2016 12:10:01 +0000 | |
changeset 115 | 15ef2af1a6f2 |
parent 114 | 8b41d01b5e5d |
child 116 | 022503caa187 |
permissions | -rw-r--r-- |
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
theory Paper |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
imports "../ReStar" "~~/src/HOL/Library/LaTeXsugar" |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
begin |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
5 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
6 |
declare [[show_question_marks = false]] |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
7 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
8 |
abbreviation |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
9 |
"der_syn r c \<equiv> der c r" |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
10 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
11 |
abbreviation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
12 |
"ders_syn r s \<equiv> ders s r" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
13 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
14 |
notation (latex output) |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
15 |
If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
16 |
Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
17 |
|
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
18 |
ZERO ("\<^bold>0" 80) and |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
19 |
ONE ("\<^bold>1" 80) and |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
20 |
CHAR ("_" [1000] 80) and |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
21 |
ALT ("_ + _" [77,77] 78) and |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
22 |
SEQ ("_ \<cdot> _" [77,77] 78) and |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
23 |
STAR ("_\<^sup>\<star>" [1000] 78) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
24 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
25 |
val.Void ("'(')" 79) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
26 |
val.Char ("Char _" [1000] 79) and |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
27 |
val.Left ("Left _" [1000] 78) and |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
28 |
val.Right ("Right _" [1000] 78) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
29 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
30 |
L ("L'(_')" [10] 78) and |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
31 |
der_syn ("_\\_" [79, 1000] 76) and |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
32 |
ders_syn ("_\\_" [79, 1000] 76) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
33 |
flat ("|_|" [75] 73) and |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
34 |
Sequ ("_ @ _" [78,77] 63) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
35 |
injval ("inj _ _ _" [78,77,78] 77) and |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
36 |
projval ("proj _ _ _" [1000,77,1000] 77) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
37 |
length ("len _" [78] 73) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
38 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
39 |
Prf ("\<triangleright> _ : _" [75,75] 75) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
40 |
PMatch ("_ : _ \<rightarrow> _" [75,75,75] 75) |
105
80218dddbb15
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
103
diff
changeset
|
41 |
(* and ValOrd ("_ \<succeq>\<^bsub>_\<^esub> _" [78,77,77] 73) *) |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
42 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
43 |
definition |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
44 |
"match r s \<equiv> nullable (ders s r)" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
45 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
(*>*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
section {* Introduction *} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
50 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
51 |
text {* |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
52 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
53 |
Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
54 |
derivative} @{term "der c r"} of a regular expression @{text r} w.r.t.\ a |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
55 |
character~@{text c}, and showed that it gave a simple solution to the |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
56 |
problem of matching a string @{term s} with a regular expression @{term r}: |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
57 |
if the derivative of @{term r} w.r.t.\ (in succession) all the characters of |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
58 |
the string matches the empty string, then @{term r} matches @{term s} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
59 |
(and {\em vice versa}). The derivative has the property (which may be |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
60 |
regarded as its specification) that, for every string @{term s} and regular |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
61 |
expression @{term r} and character @{term c}, one has @{term "cs \<in> L(r)"} if |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
62 |
and only if \mbox{@{term "s \<in> L(der c r)"}}. The beauty of Brzozowski's |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
63 |
derivatives is that they are neatly expressible in any functional language, |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
64 |
and easily definable and reasoned about in theorem provers---the definitions |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
65 |
just consist of inductive datatypes and simple recursive functions. A |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
66 |
completely formalised correctness proof of this matcher in for example HOL4 |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
67 |
has been mentioned in~\cite{Owens2008}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
68 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
69 |
One limitation of Brzozowski's matcher is that it only generates a YES/NO |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
70 |
answer for whether a string is being matched by a regular expression. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
71 |
Sulzmann and Lu \cite{Sulzmann2014} extended this matcher to allow |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
72 |
generation not just of a YES/NO answer but of an actual matching, called a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
73 |
[lexical] {\em value}. They give a simple algorithm to calculate a value |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
74 |
that appears to be the value associated with POSIX matching |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
75 |
\cite{Kuklewicz,Vansummeren2006}. The challenge then is to specify that |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
76 |
value, in an algorithm-independent fashion, and to show that Sulzamann and |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
77 |
Lu's derivative-based algorithm does indeed calculate a value that is |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
78 |
correct according to the specification. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
79 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
80 |
The answer given by Sulzmann and Lu \cite{Sulzmann2014} is to define a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
81 |
relation (called an ``Order Relation'') on the set of values of @{term r}, |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
82 |
and to show that (once a string to be matched is chosen) there is a maximum |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
83 |
element and that it is computed by their derivative-based algorithm. This |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
84 |
proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
85 |
GREEDY regular expression matching algorithm. Beginning with our |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
86 |
observations that, without evidence that it is transitive, it cannot be |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
87 |
called an ``order relation'', and that the relation is called a ``total |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
88 |
order'' despite being evidently not total\footnote{The relation @{text |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
89 |
"\<ge>\<^bsub>r\<^esub>"} defined in \cite{Sulzmann2014} is a relation on the values for |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
90 |
the regular expression @{term r}; but it only holds between @{term v} and |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
91 |
@{term "v'"} in cases where @{term v} and @{term "v'"} have the same |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
92 |
flattening (underlying string). So a counterexample to totality is given by |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
93 |
taking two values @{term v} and @{term "v'"} for @{term r} that have |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
94 |
different flattenings. A different relation @{text "\<ge>\<^bsub>r,s\<^esub>"} |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
95 |
on the set of values for @{term r} with flattening @{term s} is definable by |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
96 |
the same approach, and is indeed total; but that is not what Proposition 1 |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
97 |
of \cite{Sulzmann2014} does.}, we identify problems with this approach (of |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
98 |
which some of the proofs are not published in \cite{Sulzmann2014}); perhaps |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
99 |
more importantly, we give a simple inductive (and algorithm-independent) |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
100 |
definition of what we call being a {\em POSIX value} for a regular |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
101 |
expression @{term r} and a string @{term s}; we show that the algorithm |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
102 |
computes such a value and that such a value is unique. Proofs are both done |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
103 |
by hand and checked in Isabelle/HOL. The experience of doing our proofs has |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
104 |
been that this mechanical checking was absolutely essential: this subject |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
105 |
area has hidden snares. This was also noted by Kuklewitz \cite{Kuklewicz} |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
106 |
who found that nearly all POSIX matching implementations are ``buggy'' |
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
107 |
\cite[Page 203]{Sulzmann2014}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
108 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
109 |
If a regular expression matches a string, then in general there is more than |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
110 |
one way of how the string is matched. There are two commonly used |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
111 |
disambiguation strategies to generate a unique answer: one is called GREEDY |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
112 |
matching \cite{Frisch2004} and the other is POSIX |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
113 |
matching~\cite{Kuklewicz,Sulzmann2014,Vansummeren2006}. For example consider |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
114 |
the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
115 |
(ALT x y) xy)"}}. Either the string can be matched in two `iterations' by |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
116 |
the single letter-regular expressions @{term x} and @{term y}, or directly |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
117 |
in one iteration by @{term xy}. The first case corresponds to GREEDY |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
118 |
matching, which first matches with the left-most symbol and only matches the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
119 |
next symbol in case of a mismatch (this is greedy in the sense of preferring |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
120 |
instant gratification to delayed repletion). The second case is POSIX |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
121 |
matching, which prefers the longest match. |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
122 |
|
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
123 |
In the context of lexing, where an input string needs to be split up into a |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
124 |
sequence of tokens, POSIX is the more natural disambiguation strategy for |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
125 |
what programmers consider basic syntactic building blocks in their programs. |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
126 |
These building blocks are often specified by some regular expressions, say |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
127 |
@{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
128 |
identifiers, respectively. There are two underlying (informal) rules behind |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
129 |
tokenising a string in a POSIX fashion: |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
130 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
131 |
\begin{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
132 |
\item[$\bullet$] \underline{The Longest Match Rule (or ``maximal munch rule''):} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
133 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
134 |
The longest initial substring matched by any regular expression is taken as |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
135 |
next token.\smallskip |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
136 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
137 |
\item[$\bullet$] \underline{Rule Priority:} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
138 |
|
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
139 |
For a particular longest initial substring, the first regular expression |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
140 |
that can match determines the token. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
141 |
\end{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
142 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
143 |
\noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords such as |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
144 |
@{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
145 |
identifiers (a single character followed by characters or numbers). Then we |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
146 |
can form the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
147 |
matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. In the |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
148 |
first case we obtain by the longest match rule a single identifier token, |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
149 |
not a keyword followed by an identifier. In the second case we obtain by rule |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
150 |
priority a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
151 |
matches also.\bigskip |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
152 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
153 |
\noindent {\bf Contributions:} We have implemented in Isabelle/HOL the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
154 |
derivative-based regular expression matching algorithm as described by |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
155 |
Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
156 |
algorithms according to our specification what a POSIX value is. The |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
157 |
informal correctness proof given in \cite{Sulzmann2014} is in final |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
158 |
form\footnote{} and to us contains unfillable gaps. Our specification of a |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
159 |
POSIX value consists of a simple inductive definition that given a string |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
160 |
and a regular expression uniquely determines this value. Derivatives as |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
161 |
calculated by Brzozowski's method are usually more complex regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
162 |
expressions than the initial one; various optimisations are possible, such |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
163 |
as the simplifications of @{term "ALT ZERO r"}, @{term "ALT r ZERO"}, @{term |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
164 |
"SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r}. One of the advantages of |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
165 |
having a simple specification and correctness proof is that the latter can |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
166 |
be refined to allow for such optimisations and simple correctness proof. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
167 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
168 |
An extended version of \cite{Sulzmann2014} is available at the website of |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
169 |
its first author; this includes some ``proofs'', claimed in |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
170 |
\cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
171 |
final form, we make no comment thereon, preferring to give general reasons |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
172 |
for our belief that the approach of \cite{Sulzmann2014} is problematic |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
173 |
rather than to discuss details of unpublished work. |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
174 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
175 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
176 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
177 |
section {* Preliminaries *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
178 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
179 |
text {* \noindent Strings in Isabelle/HOL are lists of characters with the |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
180 |
empty string being represented by the empty list, written @{term "[]"}, and |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
181 |
list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
182 |
bracket notation for lists also for strings; for example a string consisting |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
183 |
of just a single character @{term c} is written @{term "[c]"}. By using the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
184 |
type @{type char} for characters we have a supply of finitely many |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
185 |
characters roughly corresponding to the ASCII character set. Regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
186 |
expressions are defined as usual as the following inductive datatype: |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
187 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
188 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
189 |
@{text "r :="} |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
190 |
@{const "ZERO"} $\mid$ |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
191 |
@{const "ONE"} $\mid$ |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
192 |
@{term "CHAR c"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
193 |
@{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
194 |
@{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
195 |
@{term "STAR r"} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
196 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
197 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
198 |
\noindent where @{const ZERO} stands for the regular expression that does |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
199 |
not match any string, @{const ONE} for the regular expression that matches |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
200 |
only the empty string and @{term c} for matching a character literal. The |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
201 |
language of a regular expression is again defined as usual by the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
202 |
recursive function @{term L} with the clauses: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
203 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
204 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
205 |
\begin{tabular}{rcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
206 |
@{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
207 |
@{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
208 |
@{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
209 |
@{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
210 |
@{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
211 |
@{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
212 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
213 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
214 |
|
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
215 |
\noindent In the fourth clause we use @{term "DUMMY ;; DUMMY"} for the |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
216 |
concatenation of two languages (it is also list-append for strings). We |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
217 |
use the star-notation for regular expressions and languages (in the last |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
218 |
clause above). The star on languages is defined inductively by two |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
219 |
clauses: @{text "(i)"} for the empty string being in the star of a |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
220 |
language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a language and @{term |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
221 |
"s\<^sub>2"} in the star of this language, then also @{term "s\<^sub>1 @ s\<^sub>2"} is in |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
222 |
the star of this language. It will also be convenient to use the following |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
223 |
notion of a \emph{semantic derivative} (or left quotient) of a language, |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
224 |
say @{text A}, defined as: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
225 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
226 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
227 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
228 |
@{thm (lhs) Der_def} & $\dn$ & @{thm (rhs) Der_def}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
229 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
230 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
231 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
232 |
\noindent |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
233 |
For semantic derivatives we have the following equations (for example |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
234 |
\cite{Krauss2011}): |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
235 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
236 |
\begin{equation}\label{SemDer} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
237 |
\begin{array}{lcl} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
238 |
@{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\ |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
239 |
@{thm (lhs) Der_empty} & \dn & @{thm (rhs) Der_empty}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
240 |
@{thm (lhs) Der_char} & \dn & @{thm (rhs) Der_char}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
241 |
@{thm (lhs) Der_union} & \dn & @{thm (rhs) Der_union}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
242 |
@{thm (lhs) Der_Sequ} & \dn & @{thm (rhs) Der_Sequ}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
243 |
@{thm (lhs) Der_star} & \dn & @{thm (rhs) Der_star} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
244 |
\end{array} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
245 |
\end{equation} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
246 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
247 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
248 |
\noindent \emph{\Brz's derivatives} of regular expressions |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
249 |
\cite{Brzozowski1964} can be easily defined by two recursive functions: |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
250 |
the first is from regular expressions to booleans (implementing a test |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
251 |
when a regular expression can match the empty string), and the second |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
252 |
takes a regular expression and a character to a (derivative) regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
253 |
expression: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
254 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
255 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
256 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
257 |
@{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
258 |
@{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
259 |
@{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
260 |
@{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
261 |
@{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
262 |
@{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
263 |
@{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
264 |
@{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
265 |
@{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
266 |
@{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
267 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
268 |
@{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
269 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
270 |
\end{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
271 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
272 |
\noindent |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
273 |
We may extend this definition to give derivatives w.r.t.~strings: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
274 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
275 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
276 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
277 |
@{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
278 |
@{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
279 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
280 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
281 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
282 |
\noindent Given the equations in \eqref{SemDer}, it is a relatively easy |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
283 |
exercise in mechanical reasoning to establish that |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
284 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
285 |
\begin{proposition}\mbox{}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
286 |
\begin{tabular}{ll} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
287 |
@{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
288 |
@{thm (rhs) nullable_correctness} \\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
289 |
@{text "(2)"} & @{thm[mode=IfThen] der_correctness} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
290 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
291 |
\end{proposition} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
292 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
293 |
\noindent With this in place it is also very routine to prove that the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
294 |
regular expression matcher defined as |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
295 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
296 |
\begin{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
297 |
@{thm match_def} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
298 |
\end{center} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
299 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
300 |
\noindent gives a positive answer if and only if @{term "s \<in> L r"}. While |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
301 |
the matcher above calculates a provably correct a YES/NO answer for |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
302 |
whether a regular expression matches a string, the novel idea of Sulzmann |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
303 |
and Lu \cite{Sulzmann2014} is to append another phase to calculate a |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
304 |
value. We will explain the details next. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
305 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
306 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
307 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
308 |
section {* POSIX Regular Expression Matching *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
309 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
310 |
text {* |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
311 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
312 |
The clever idea in \cite{Sulzmann2014} is to introduce values for encoding |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
313 |
\emph{how} a regular expression matches a string and then define a function |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
314 |
on values that mirrors (but inverts) the construction of the derivative on |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
315 |
regular expressions. Values are defined as the inductive datatype |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
316 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
317 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
318 |
@{text "v :="} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
319 |
@{const "Void"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
320 |
@{term "val.Char c"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
321 |
@{term "Left v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
322 |
@{term "Right v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
323 |
@{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
324 |
@{term "Stars vs"} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
325 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
326 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
327 |
\noindent where we use @{term vs} standing for a list of values. The string |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
328 |
underlying a values can be calculated by the @{const flat} function, written |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
329 |
@{term "flat DUMMY"} and defined as: |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
330 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
331 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
332 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
333 |
@{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
334 |
@{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
335 |
@{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
336 |
@{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
337 |
@{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
338 |
@{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
339 |
@{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
340 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
341 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
342 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
343 |
\noindent Sulzmann and Lu also define inductively an inhabitation relation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
344 |
that associates values to regular expressions: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
345 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
346 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
347 |
\begin{tabular}{c} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
348 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
349 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
350 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
351 |
@{thm[mode=Axiom] Prf.intros(4)} \qquad |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
352 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
353 |
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
354 |
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
355 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
356 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
357 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
358 |
\noindent Note that no values are associated with the regular expression |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
359 |
@{term ZERO}, and that the only value associated with the regular |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
360 |
expression @{term ONE} is @{term Void}, pronounced (if one must) as {\em |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
361 |
``Void''}. It is routine to stablish how values `inhabitated' by a regular |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
362 |
expression correspond to the language of a regular expression, namely |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
363 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
364 |
\begin{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
365 |
@{thm L_flat_Prf} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
366 |
\end{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
367 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
368 |
Graphically the algorithm by Sulzmann \& Lu can be illustrated by the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
369 |
picture in Figure~\ref{Sulz} where the path from the left to the right |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
370 |
involving $der/nullable$ is the first phase of the algorithm and |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
371 |
$mkeps/inj$, the path from right to left, the second phase. This picture |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
372 |
shows the steps required when a regular expression, say $r_1$, matches the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
373 |
string $abc$. We first build the three derivatives (according to $a$, $b$ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
374 |
and $c$). We then use $nullable$ to find out whether the resulting regular |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
375 |
expression can match the empty string. If yes, we call the function |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
376 |
$mkeps$. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
377 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
378 |
\begin{figure}[t] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
379 |
\begin{center} |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
380 |
\begin{tikzpicture}[scale=2,node distance=1.3cm, |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
381 |
every node/.style={minimum size=7mm}] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
382 |
\node (r1) {@{term "r\<^sub>1"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
383 |
\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
384 |
\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
385 |
\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
386 |
\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
387 |
\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
388 |
\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
389 |
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
390 |
\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
391 |
\draw[->,line width=1mm](r4) -- (v4); |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
392 |
\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
393 |
\draw[->,line width=1mm](v4)--(v3) node[below,midway] {@{text "inj r\<^sub>3 c"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
394 |
\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
395 |
\draw[->,line width=1mm](v3)--(v2) node[below,midway] {@{text "inj r\<^sub>2 b"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
396 |
\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
397 |
\draw[->,line width=1mm](v2)--(v1) node[below,midway] {@{text "inj r\<^sub>1 a"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
398 |
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
399 |
\end{tikzpicture} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
400 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
401 |
\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014} |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
402 |
matching the string @{term "[a,b,c]"}. The first phase (the arrows from |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
403 |
left to right) is \Brz's matcher building succesive derivatives. If at the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
404 |
last regular expression is @{term nullable}, then functions of the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
405 |
second phase are called: first @{term mkeps} calculates a value witnessing |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
406 |
how the empty string has been recognised by @{term "r\<^sub>4"}. After |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
407 |
that the function @{term inj} `injects back' the characters of the string into |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
408 |
the values (the arrows from right to left). |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
409 |
\label{Sulz}} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
410 |
\end{figure} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
411 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
412 |
NOT DONE YET |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
413 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
414 |
We begin with the case of a nullable regular expression: from the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
415 |
nullability we need to construct a value that witnesses the nullability. |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
416 |
The @{const mkeps} function (from \cite{Sulzmann2014}) is a partial (but |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
417 |
unambiguous) function from regular expressions to values, total on exactly |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
418 |
the set of nullable regular expressions. |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
419 |
|
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
420 |
\begin{center} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
421 |
\begin{tabular}{lcl} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
422 |
@{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
423 |
@{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
424 |
@{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
425 |
@{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
426 |
\end{tabular} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
427 |
\end{center} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
428 |
|
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
429 |
The well-known idea of POSIX lexing is informally defined in (for example) |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
430 |
\cite{posix}; as correctly argued in \cite{Sulzmann2014}, this needs formal |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
431 |
specification. The rough idea is that, in contrast to the so-called GREEDY |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
432 |
algorithm, POSIX lexing chooses to match more deeply and using left choices |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
433 |
rather than a right choices. For example, note that to match the string |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
434 |
@{term "[a, b]"} with the regular expression $(a + \mts)\circ (b+ab)$ the matching |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
435 |
will return $( Void, Right(ab))$ rather than $(Left\ a, Left\ b)$. [The |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
436 |
regular expression $ab$ is short for $(Lit\ a) \circ (Lit\ b)$.] Similarly, |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
437 |
to match {\em ``a''} with $(a+a)$ the leftmost $a$ will be chosen. |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
438 |
|
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
439 |
We use a simple inductive definition to specify this notion, incorporating |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
440 |
the POSIX-specific choices into the side-conditions for the rules $R tl |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
441 |
+_2$, $R tl\circ$ and $R tl*$ (as they are now called). By contrast, |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
442 |
\cite{Sulzmann2014} defines a relation between values and argues that there is a |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
443 |
maximum value, as given by the derivative-based algorithm yet to be spelt |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
444 |
out. The relation we define is ternary, relating strings, values and regular |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
445 |
expressions. |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
446 |
|
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
447 |
Our Posix relation @{term "s \<in> r \<rightarrow> v"} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
448 |
|
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
449 |
\begin{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
450 |
\begin{tabular}{c} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
451 |
@{thm[mode=Axiom] PMatch.intros(1)} \qquad |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
452 |
@{thm[mode=Axiom] PMatch.intros(2)}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
453 |
@{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
454 |
@{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
455 |
\multicolumn{1}{p{5cm}}{@{thm[mode=Rule] PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
456 |
@{thm[mode=Rule] PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
457 |
@{thm[mode=Axiom] PMatch.intros(7)}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
458 |
\end{tabular} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
459 |
\end{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
460 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
461 |
*} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
462 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
463 |
section {* The Argument by Sulzmmann and Lu *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
464 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
465 |
section {* Conclusion *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
466 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
467 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
468 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
469 |
Nipkow lexer from 2000 |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
470 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
471 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
472 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
473 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
474 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
475 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
476 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
477 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
478 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
479 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
480 |
Values |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
481 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
482 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
483 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
484 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
485 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
486 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
487 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
488 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
489 |
The @{const mkeps} function |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
490 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
491 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
492 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
493 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
494 |
The @{text inj} function |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
495 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
496 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
497 |
\begin{tabular}{lcl} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
498 |
@{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ |
101
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
499 |
@{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
500 |
@{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
501 |
@{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
502 |
@{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
503 |
@{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
504 |
& @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
505 |
@{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
506 |
& @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
507 |
@{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
508 |
& @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
509 |
@{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
510 |
& @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
511 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
512 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
513 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
514 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
515 |
The inhabitation relation: |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
516 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
517 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
518 |
\begin{tabular}{c} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
519 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
520 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
521 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
522 |
@{thm[mode=Axiom] Prf.intros(4)} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
523 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
524 |
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
525 |
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
526 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
527 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
528 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
529 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
530 |
We have also introduced a slightly restricted version of this relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
531 |
where the last rule is restricted so that @{term "flat v \<noteq> []"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
532 |
This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
533 |
\bigskip |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
534 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
535 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
536 |
\noindent |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
537 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
538 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
539 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
540 |
Our version of Sulzmann's ordering relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
541 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
542 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
543 |
\begin{tabular}{c} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
544 |
@{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
545 |
@{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
546 |
@{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
547 |
@{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
548 |
@{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
549 |
@{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "r\<^sub>2"]} \medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
550 |
@{thm[mode=Axiom] ValOrd.intros(7)}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
551 |
@{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
552 |
@{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
553 |
@{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
554 |
@{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
555 |
@{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
556 |
@{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
557 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
558 |
\end{center} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
559 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
560 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
561 |
A prefix of a string s |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
562 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
563 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
564 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
565 |
@{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
566 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
567 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
568 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
569 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
570 |
Values and non-problematic values |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
571 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
572 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
573 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
574 |
@{thm Values_def}\medskip\\ |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
575 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
576 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
577 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
578 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
579 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
580 |
The point is that for a given @{text s} and @{text r} there are only finitely many |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
581 |
non-problematic values. |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
582 |
*} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
583 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
584 |
text {* |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
585 |
\noindent |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
586 |
Some lemmas we have proved:\bigskip |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
587 |
|
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
588 |
@{thm L_flat_Prf} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
589 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
590 |
@{thm L_flat_NPrf} |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
591 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
592 |
@{thm[mode=IfThen] mkeps_nullable} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
593 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
594 |
@{thm[mode=IfThen] mkeps_flat} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
595 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
596 |
@{thm[mode=IfThen] Prf_injval} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
597 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
598 |
@{thm[mode=IfThen] Prf_injval_flat} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
599 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
600 |
@{thm[mode=IfThen] PMatch_mkeps} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
601 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
602 |
@{thm[mode=IfThen] PMatch1(2)} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
603 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
604 |
@{thm[mode=IfThen] PMatch1N} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
605 |
|
100
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
606 |
@{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} |
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
607 |
|
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
608 |
\medskip |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
609 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
610 |
This is the main theorem that lets us prove that the algorithm is correct according to |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
611 |
@{term "s \<in> r \<rightarrow> v"}: |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
612 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
613 |
@{thm[mode=IfThen] PMatch2} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
614 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
615 |
\mbox{}\bigskip |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
616 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
617 |
\noindent {\bf Proof} The proof is by induction on the definition of |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
618 |
@{const der}. Other inductions would go through as well. The |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
619 |
interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
620 |
case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
621 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
622 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
623 |
\begin{array}{l} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
624 |
(IH1)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>1 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
625 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v"}\\ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
626 |
(IH2)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>2 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
627 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> injval r\<^sub>2 c v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
628 |
\end{array} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
629 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
630 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
631 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
632 |
and have |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
633 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
634 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
635 |
@{term "s \<in> ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2) \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
636 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
637 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
638 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
639 |
There are two cases what @{term v} can be: (1) @{term "Left v'"} and (2) @{term "Right v'"}. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
640 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
641 |
\begin{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
642 |
\item[(1)] We know @{term "s \<in> SEQ (der c r\<^sub>1) r\<^sub>2 \<rightarrow> v'"} holds, from which we |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
643 |
can infer that there are @{text "s\<^sub>1"}, @{term "s\<^sub>2"}, @{text "v\<^sub>1"}, @{term "v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
644 |
with |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
645 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
646 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
647 |
@{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} \qquad\text{and}\qquad @{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
648 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
649 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
650 |
and also |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
651 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
652 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
653 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
654 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
655 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
656 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
657 |
and have to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
658 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
659 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
660 |
@{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
661 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
662 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
663 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
664 |
The two requirements @{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} and |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
665 |
@{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} can be proved by the induction hypothese (IH1) and the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
666 |
fact above. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
667 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
668 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
669 |
This leaves to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
670 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
671 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
672 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
673 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
674 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
675 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
676 |
which holds because @{term "(c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 "} implies @{term "s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) "} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
677 |
|
103
ffe5d850df62
added some slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
678 |
\item[(2)] This case is similar. |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
679 |
\end{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
680 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
681 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
682 |
The final case is that @{term " \<not> nullable r\<^sub>1"} holds. This case again similar |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
683 |
to the cases above. |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
684 |
*} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
685 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
686 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
687 |
text {* |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
688 |
%\noindent |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
689 |
%{\bf Acknowledgements:} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
690 |
%We are grateful for the comments we received from anonymous |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
691 |
%referees. |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
692 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
693 |
\bibliographystyle{plain} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
694 |
\bibliography{root} |
101
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
695 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
696 |
\section{Roy's Rules} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
697 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
698 |
\newcommand{\abs}[1]{\mid\!\! #1\!\! \mid} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
699 |
%%\newcommand{\mts}{\textit{``''} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
700 |
\newcommand{\tl}{\ \triangleleft\ } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
701 |
$$\inferrule[]{Void \tl \epsilon}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
702 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
703 |
\inferrule[]{Char\ c \tl Lit\ c}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
704 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
705 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
706 |
{v_1 \tl r_1} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
707 |
{Left\ v_1 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
708 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
709 |
\inferrule[] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
710 |
{ v_2 \tl r_2 \\ \abs{v_2}\ \not\in\ L(r_1)} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
711 |
{Right\ v_2 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
712 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
713 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
714 |
\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
715 |
{v_1 \tl r_1\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
716 |
v_2 \tl r_2\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
717 |
s \in\ L(r_1\backslash\! \abs{v_1}) \ \land\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
718 |
\abs{v_2}\!\backslash s\ \epsilon\ L(r_2) |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
719 |
\ \Rightarrow\ s = [] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
720 |
} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
721 |
{(v_1, v_2) \tl r_1 \cdot r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
722 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
723 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
724 |
{ v \tl r \\ vs \tl r^* \\ \abs{v}\ \not=\ []} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
725 |
{ (v :: vs) \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
726 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
727 |
\inferrule{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
728 |
{ [] \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
729 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
730 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
731 |
*} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
732 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
733 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
734 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
735 |
end |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
736 |
(*>*) |