author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Sun, 06 Mar 2016 16:51:32 +0000 | |
changeset 117 | 2c4ffcc95399 |
parent 116 | 022503caa187 |
child 118 | 79efc0bcfc96 |
permissions | -rw-r--r-- |
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
theory Paper |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
imports "../ReStar" "~~/src/HOL/Library/LaTeXsugar" |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
begin |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
5 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
6 |
declare [[show_question_marks = false]] |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
7 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
8 |
abbreviation |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
9 |
"der_syn r c \<equiv> der c r" |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
10 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
11 |
abbreviation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
12 |
"ders_syn r s \<equiv> ders s r" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
13 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
14 |
notation (latex output) |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
15 |
If ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
16 |
Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [75,75] 73) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
17 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
18 |
ZERO ("\<^bold>0" 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
19 |
ONE ("\<^bold>1" 78) and |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
20 |
CHAR ("_" [1000] 80) and |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
21 |
ALT ("_ + _" [77,77] 78) and |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
22 |
SEQ ("_ \<cdot> _" [77,77] 78) and |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
23 |
STAR ("_\<^sup>\<star>" [1000] 78) and |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
24 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
25 |
val.Void ("'(')" 79) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
26 |
val.Char ("Char _" [1000] 79) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
27 |
val.Left ("Left _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
28 |
val.Right ("Right _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
29 |
val.Seq ("Seq _ _" [79,79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
30 |
val.Stars ("Stars _" [79] 78) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
31 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
32 |
L ("L'(_')" [10] 78) and |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
33 |
der_syn ("_\\_" [79, 1000] 76) and |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
34 |
ders_syn ("_\\_" [79, 1000] 76) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
35 |
flat ("|_|" [75] 73) and |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
36 |
Sequ ("_ @ _" [78,77] 63) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
37 |
injval ("inj _ _ _" [79,77,79] 76) and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
38 |
mkeps ("mkeps _" [79] 76) and |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
39 |
projval ("proj _ _ _" [1000,77,1000] 77) and |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
40 |
length ("len _" [78] 73) and |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
41 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
42 |
Prf ("\<triangleright> _ : _" [75,75] 75) and |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
43 |
PMatch ("'(_, _') \<rightarrow> _" [63,75,75] 75) |
105
80218dddbb15
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
103
diff
changeset
|
44 |
(* and ValOrd ("_ \<succeq>\<^bsub>_\<^esub> _" [78,77,77] 73) *) |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
45 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
46 |
definition |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
47 |
"match r s \<equiv> nullable (ders s r)" |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
48 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
(*>*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
section {* Introduction *} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
53 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
54 |
text {* |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
55 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
56 |
Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
57 |
derivative} @{term "der c r"} of a regular expression @{text r} w.r.t.\ a |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
58 |
character~@{text c}, and showed that it gave a simple solution to the |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
59 |
problem of matching a string @{term s} with a regular expression @{term r}: |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
60 |
if the derivative of @{term r} w.r.t.\ (in succession) all the characters of |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
61 |
the string matches the empty string, then @{term r} matches @{term s} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
62 |
(and {\em vice versa}). The derivative has the property (which may be |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
63 |
regarded as its specification) that, for every string @{term s} and regular |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
64 |
expression @{term r} and character @{term c}, one has @{term "cs \<in> L(r)"} if |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
65 |
and only if \mbox{@{term "s \<in> L(der c r)"}}. The beauty of Brzozowski's |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
66 |
derivatives is that they are neatly expressible in any functional language, |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
67 |
and easily definable and reasoned about in theorem provers---the definitions |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
68 |
just consist of inductive datatypes and simple recursive functions. A |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
69 |
completely formalised correctness proof of this matcher in for example HOL4 |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
70 |
has been mentioned in~\cite{Owens2008}. Another one in Isabelle/HOL is |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
71 |
in \cite{Krauss2011}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
72 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
73 |
One limitation of Brzozowski's matcher is that it only generates a YES/NO |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
74 |
answer for whether a string is being matched by a regular expression. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
75 |
Sulzmann and Lu \cite{Sulzmann2014} extended this matcher to allow |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
76 |
generation not just of a YES/NO answer but of an actual matching, called a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
77 |
[lexical] {\em value}. They give a simple algorithm to calculate a value |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
78 |
that appears to be the value associated with POSIX matching |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
79 |
\cite{Kuklewicz,Vansummeren2006}. The challenge then is to specify that |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
80 |
value, in an algorithm-independent fashion, and to show that Sulzamann and |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
81 |
Lu's derivative-based algorithm does indeed calculate a value that is |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
82 |
correct according to the specification. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
83 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
84 |
The answer given by Sulzmann and Lu \cite{Sulzmann2014} is to define a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
85 |
relation (called an ``Order Relation'') on the set of values of @{term r}, |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
86 |
and to show that (once a string to be matched is chosen) there is a maximum |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
87 |
element and that it is computed by their derivative-based algorithm. This |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
88 |
proof idea is inspired by work of Frisch and Cardelli \cite{Frisch2004} on a |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
89 |
GREEDY regular expression matching algorithm. Beginning with our |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
90 |
observations that, without evidence that it is transitive, it cannot be |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
91 |
called an ``order relation'', and that the relation is called a ``total |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
92 |
order'' despite being evidently not total\footnote{The relation @{text |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
93 |
"\<ge>\<^bsub>r\<^esub>"} defined in \cite{Sulzmann2014} is a relation on the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
94 |
values for the regular expression @{term r}; but it only holds between |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
95 |
@{term v} and @{term "v'"} in cases where @{term v} and @{term "v'"} have |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
96 |
the same flattening (underlying string). So a counterexample to totality is |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
97 |
given by taking two values @{term v} and @{term "v'"} for @{term r} that |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
98 |
have different flattenings (see Section~\ref{posixsec}). A different |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
99 |
relation @{text "\<ge>\<^bsub>r,s\<^esub>"} on the set of values for @{term r} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
100 |
with flattening @{term s} is definable by the same approach, and is indeed |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
101 |
total; but that is not what Proposition 1 of \cite{Sulzmann2014} does.}, we |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
102 |
identify problems with this approach (of which some of the proofs are not |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
103 |
published in \cite{Sulzmann2014}); perhaps more importantly, we give a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
104 |
simple inductive (and algorithm-independent) definition of what we call |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
105 |
being a {\em POSIX value} for a regular expression @{term r} and a string |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
106 |
@{term s}; we show that the algorithm computes such a value and that such a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
107 |
value is unique. Proofs are both done by hand and checked in Isabelle/HOL. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
108 |
The experience of doing our proofs has been that this mechanical checking |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
109 |
was absolutely essential: this subject area has hidden snares. This was also |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
110 |
noted by Kuklewitz \cite{Kuklewicz} who found that nearly all POSIX matching |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
111 |
implementations are ``buggy'' \cite[Page 203]{Sulzmann2014}. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
112 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
113 |
If a regular expression matches a string, then in general there is more than |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
114 |
one way of how the string is matched. There are two commonly used |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
115 |
disambiguation strategies to generate a unique answer: one is called GREEDY |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
116 |
matching \cite{Frisch2004} and the other is POSIX |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
117 |
matching~\cite{Kuklewicz,Sulzmann2014,Vansummeren2006}. For example consider |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
118 |
the string @{term xy} and the regular expression \mbox{@{term "STAR (ALT |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
119 |
(ALT x y) xy)"}}. Either the string can be matched in two `iterations' by |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
120 |
the single letter-regular expressions @{term x} and @{term y}, or directly |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
121 |
in one iteration by @{term xy}. The first case corresponds to GREEDY |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
122 |
matching, which first matches with the left-most symbol and only matches the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
123 |
next symbol in case of a mismatch (this is greedy in the sense of preferring |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
124 |
instant gratification to delayed repletion). The second case is POSIX |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
125 |
matching, which prefers the longest match. |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
126 |
|
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
127 |
In the context of lexing, where an input string needs to be split up into a |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
128 |
sequence of tokens, POSIX is the more natural disambiguation strategy for |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
129 |
what programmers consider basic syntactic building blocks in their programs. |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
130 |
These building blocks are often specified by some regular expressions, say |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
131 |
@{text "r\<^bsub>key\<^esub>"} and @{text "r\<^bsub>id\<^esub>"} for recognising keywords and |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
132 |
identifiers, respectively. There are two underlying (informal) rules behind |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
133 |
tokenising a string in a POSIX fashion: |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
134 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
135 |
\begin{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
136 |
\item[$\bullet$] \underline{The Longest Match Rule (or ``maximal munch rule''):} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
137 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
138 |
The longest initial substring matched by any regular expression is taken as |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
139 |
next token.\smallskip |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
140 |
|
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
141 |
\item[$\bullet$] \underline{Rule Priority:} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
142 |
|
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
143 |
For a particular longest initial substring, the first regular expression |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
144 |
that can match determines the token. |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
145 |
\end{itemize} |
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
146 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
147 |
\noindent Consider for example @{text "r\<^bsub>key\<^esub>"} recognising keywords such as |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
148 |
@{text "if"}, @{text "then"} and so on; and @{text "r\<^bsub>id\<^esub>"} recognising |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
149 |
identifiers (say, a single character followed by characters or numbers). Then we |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
150 |
can form the regular expression @{text "(r\<^bsub>key\<^esub> + r\<^bsub>id\<^esub>)\<^sup>\<star>"} and use POSIX |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
151 |
matching to tokenise strings, say @{text "iffoo"} and @{text "if"}. In the |
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
152 |
first case we obtain by the longest match rule a single identifier token, |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
153 |
not a keyword followed by an identifier. In the second case we obtain by rule |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
154 |
priority a keyword token, not an identifier token---even if @{text "r\<^bsub>id\<^esub>"} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
155 |
matches also.\bigskip |
109
2c38f10643ae
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
108
diff
changeset
|
156 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
157 |
\noindent {\bf Contributions:} (NOT DONE YET) We have implemented in Isabelle/HOL the |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
158 |
derivative-based regular expression matching algorithm as described by |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
159 |
Sulzmann and Lu \cite{Sulzmann2014}. We have proved the correctness of this |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
160 |
algorithm according to our specification of what a POSIX value is. The |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
161 |
informal correctness proof given in \cite{Sulzmann2014} is in final |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
162 |
form\footnote{} and to us contains unfillable gaps. Our specification of a |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
163 |
POSIX value consists of a simple inductive definition that given a string |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
164 |
and a regular expression uniquely determines this value. Derivatives as |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
165 |
calculated by Brzozowski's method are usually more complex regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
166 |
expressions than the initial one; various optimisations are possible, such |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
167 |
as the simplifications of @{term "ALT ZERO r"}, @{term "ALT r ZERO"}, @{term |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
168 |
"SEQ ONE r"} and @{term "SEQ r ONE"} to @{term r}. One of the advantages of |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
169 |
having a simple specification and correctness proof is that the latter can |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
170 |
be refined to allow for such optimisations and simple correctness proof. |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
171 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
172 |
An extended version of \cite{Sulzmann2014} is available at the website of |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
173 |
its first author; this includes some ``proofs'', claimed in |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
174 |
\cite{Sulzmann2014} to be ``rigorous''. Since these are evidently not in |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
175 |
final form, we make no comment thereon, preferring to give general reasons |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
176 |
for our belief that the approach of \cite{Sulzmann2014} is problematic |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
177 |
rather than to discuss details of unpublished work. |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
178 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
179 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
180 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
181 |
section {* Preliminaries *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
182 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
183 |
text {* \noindent Strings in Isabelle/HOL are lists of characters with the |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
184 |
empty string being represented by the empty list, written @{term "[]"}, and |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
185 |
list-cons being written as @{term "DUMMY # DUMMY"}. Often we use the usual |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
186 |
bracket notation for lists also for strings; for example a string consisting |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
187 |
of just a single character @{term c} is written @{term "[c]"}. By using the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
188 |
type @{type char} for characters we have a supply of finitely many |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
189 |
characters roughly corresponding to the ASCII character set. Regular |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
190 |
expressions are defined as usual as the elements of the following inductive |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
191 |
datatype: |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
192 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
193 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
194 |
@{text "r :="} |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
195 |
@{const "ZERO"} $\mid$ |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
196 |
@{const "ONE"} $\mid$ |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
197 |
@{term "CHAR c"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
198 |
@{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
199 |
@{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
200 |
@{term "STAR r"} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
201 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
202 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
203 |
\noindent where @{const ZERO} stands for the regular expression that does |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
204 |
not match any string, @{const ONE} for the regular expression that matches |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
205 |
only the empty string and @{term c} for matching a character literal. The |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
206 |
language of a regular expression is also defined as usual by the |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
207 |
recursive function @{term L} with the clauses: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
208 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
209 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
210 |
\begin{tabular}{rcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
211 |
@{thm (lhs) L.simps(1)} & $\dn$ & @{thm (rhs) L.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
212 |
@{thm (lhs) L.simps(2)} & $\dn$ & @{thm (rhs) L.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
213 |
@{thm (lhs) L.simps(3)} & $\dn$ & @{thm (rhs) L.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
214 |
@{thm (lhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
215 |
@{thm (lhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) L.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
216 |
@{thm (lhs) L.simps(6)} & $\dn$ & @{thm (rhs) L.simps(6)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
217 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
218 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
219 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
220 |
\noindent In the fourth clause we use the operation @{term "DUMMY ;; |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
221 |
DUMMY"} for the concatenation of two languages (it is also list-append for |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
222 |
strings). We use the star-notation for regular expressions and languages |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
223 |
(in the last clause above). The star on languages is defined inductively |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
224 |
by two clauses: @{text "(i)"} for the empty string being in the star of a |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
225 |
language and @{text "(ii)"} if @{term "s\<^sub>1"} is in a language and @{term |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
226 |
"s\<^sub>2"} in the star of this language, then also @{term "s\<^sub>1 @ s\<^sub>2"} is in |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
227 |
the star of this language. It will also be convenient to use the following |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
228 |
notion of a \emph{semantic derivative} (or \emph{left quotient}) of a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
229 |
language, say @{text A}, defined as: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
230 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
231 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
232 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
233 |
@{thm (lhs) Der_def} & $\dn$ & @{thm (rhs) Der_def}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
234 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
235 |
\end{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
236 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
237 |
\noindent |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
238 |
For semantic derivatives we have the following equations (for example |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
239 |
mechanically proved in \cite{Krauss2011}): |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
240 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
241 |
\begin{equation}\label{SemDer} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
242 |
\begin{array}{lcl} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
243 |
@{thm (lhs) Der_null} & \dn & @{thm (rhs) Der_null}\\ |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
244 |
@{thm (lhs) Der_empty} & \dn & @{thm (rhs) Der_empty}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
245 |
@{thm (lhs) Der_char} & \dn & @{thm (rhs) Der_char}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
246 |
@{thm (lhs) Der_union} & \dn & @{thm (rhs) Der_union}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
247 |
@{thm (lhs) Der_Sequ} & \dn & @{thm (rhs) Der_Sequ}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
248 |
@{thm (lhs) Der_star} & \dn & @{thm (rhs) Der_star} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
249 |
\end{array} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
250 |
\end{equation} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
251 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
252 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
253 |
\noindent \emph{\Brz's derivatives} of regular expressions |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
254 |
\cite{Brzozowski1964} can be easily defined by two recursive functions: |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
255 |
the first is from regular expressions to booleans (implementing a test |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
256 |
when a regular expression can match the empty string), and the second |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
257 |
takes a regular expression and a character to a (derivative) regular |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
258 |
expression: |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
259 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
260 |
\begin{center} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
261 |
\begin{tabular}{lcl} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
262 |
@{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
263 |
@{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
264 |
@{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
265 |
@{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
266 |
@{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
267 |
@{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\ |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
268 |
@{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
269 |
@{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
270 |
@{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
271 |
@{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
272 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
110
267afb7fb700
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
109
diff
changeset
|
273 |
@{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
274 |
\end{tabular} |
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
275 |
\end{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
276 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
277 |
\noindent |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
278 |
We may extend this definition to give derivatives w.r.t.~strings: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
279 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
280 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
281 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
282 |
@{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
283 |
@{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
284 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
285 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
286 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
287 |
\noindent Given the equations in \eqref{SemDer}, it is a relatively easy |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
288 |
exercise in mechanical reasoning to establish that |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
289 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
290 |
\begin{proposition}\mbox{}\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
291 |
\begin{tabular}{ll} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
292 |
@{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
293 |
@{thm (rhs) nullable_correctness}, and \\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
294 |
@{text "(2)"} & @{thm[mode=IfThen] der_correctness}. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
295 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
296 |
\end{proposition} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
297 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
298 |
\noindent With this in place it is also very routine to prove that the |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
299 |
regular expression matcher defined as |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
300 |
|
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
301 |
\begin{center} |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
302 |
@{thm match_def} |
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
303 |
\end{center} |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
304 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
305 |
\noindent gives a positive answer if and only if @{term "s \<in> L r"}. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
306 |
Consequently, this regular expression matching algorithm satisfies the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
307 |
usual specification. While the matcher above calculates a provably correct |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
308 |
a YES/NO answer for whether a regular expression matches a string, the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
309 |
novel idea of Sulzmann and Lu \cite{Sulzmann2014} is to append another |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
310 |
phase to this algorithm in order to calculate a [lexical] value. We will |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
311 |
explain the details next. |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
312 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
313 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
314 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
315 |
section {* POSIX Regular Expression Matching\label{posixsec} *} |
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
316 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
317 |
text {* |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
318 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
319 |
The clever idea in \cite{Sulzmann2014} is to introduce values for encoding |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
320 |
\emph{how} a regular expression matches a string and then define a function |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
321 |
on values that mirrors (but inverts) the construction of the derivative on |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
322 |
regular expressions. \emph{Values} are defined as the inductive datatype |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
323 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
324 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
325 |
@{text "v :="} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
326 |
@{const "Void"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
327 |
@{term "val.Char c"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
328 |
@{term "Left v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
329 |
@{term "Right v"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
330 |
@{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
331 |
@{term "Stars vs"} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
332 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
333 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
334 |
\noindent where we use @{term vs} standing for a list of values. (This is |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
335 |
similar to the approach taken by Frisch and Cardelli for GREEDY matching |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
336 |
\cite{Frisch2014}, and Sulzmann and Lu \cite{2014} for POSIX matching). |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
337 |
The string underlying a value can be calculated by the @{const flat} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
338 |
function, written @{term "flat DUMMY"} and defined as: |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
339 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
340 |
\begin{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
341 |
\begin{tabular}{lcl} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
342 |
@{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
343 |
@{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
344 |
@{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
345 |
@{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
346 |
@{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
347 |
@{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
348 |
@{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\ |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
349 |
\end{tabular} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
350 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
351 |
|
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
352 |
\noindent Sulzmann and Lu also define inductively an inhabitation relation |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
353 |
that associates values to regular expressions: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
354 |
|
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
355 |
\begin{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
356 |
\begin{tabular}{c} |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
357 |
@{thm[mode=Axiom] Prf.intros(4)} \qquad |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
358 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
359 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
360 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
361 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
362 |
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
363 |
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
364 |
\end{tabular} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
365 |
\end{center} |
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
366 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
367 |
\noindent Note that no values are associated with the regular expression |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
368 |
@{term ZERO}, and that the only value associated with the regular |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
369 |
expression @{term ONE} is @{term Void}, pronounced (if one must) as {\em |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
370 |
``Void''}. It is routine to stablish how values ``inhabiting'' a regular |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
371 |
expression correspond to the language of a regular expression, namely |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
372 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
373 |
\begin{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
374 |
@{thm L_flat_Prf} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
375 |
\end{proposition} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
376 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
377 |
In general there are more than one value associated with a regular |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
378 |
expression. In case of POSIX matching the problem is to calculate the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
379 |
unique value that satisfies the (informal) POSIX constraints from the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
380 |
Introduction. Graphically the regular expression matching algorithm by |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
381 |
Sulzmann and Lu can be illustrated by the picture in Figure~\ref{Sulz} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
382 |
where the path from the left to the right involving @{const der}/@{const |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
383 |
nullable} is the first phase of the algorithm (calculating successive |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
384 |
\Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
385 |
left, the second phase. This picture shows the steps required when a |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
386 |
regular expression, say @{text "r\<^sub>1"}, matches the string @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
387 |
"[a,b,c]"}. We first build the three derivatives (according to @{term a}, |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
388 |
@{term b} and @{term c}). We then use @{const nullable} to find out |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
389 |
whether the resulting derivative regular expression @{term "r\<^sub>4"} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
390 |
can match the empty string. If yes, we call the function @{const mkeps} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
391 |
that produces a value @{term "v\<^sub>4"} for how @{term "r\<^sub>4"} can |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
392 |
match the empty string (taking into account the POSIX constraints in case |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
393 |
there are several ways). This functions is defined by the clauses: |
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
394 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
395 |
\begin{figure}[t] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
396 |
\begin{center} |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
397 |
\begin{tikzpicture}[scale=2,node distance=1.3cm, |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
398 |
every node/.style={minimum size=7mm}] |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
399 |
\node (r1) {@{term "r\<^sub>1"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
400 |
\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
401 |
\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
402 |
\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
403 |
\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
404 |
\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
405 |
\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
406 |
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
407 |
\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
408 |
\draw[->,line width=1mm](r4) -- (v4); |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
409 |
\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
410 |
\draw[->,line width=1mm](v4)--(v3) node[below,midway] {@{text "inj r\<^sub>3 c"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
411 |
\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
412 |
\draw[->,line width=1mm](v3)--(v2) node[below,midway] {@{text "inj r\<^sub>2 b"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
413 |
\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
414 |
\draw[->,line width=1mm](v2)--(v1) node[below,midway] {@{text "inj r\<^sub>1 a"}}; |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
415 |
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
416 |
\end{tikzpicture} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
417 |
\end{center} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
418 |
\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014} |
115
15ef2af1a6f2
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
114
diff
changeset
|
419 |
matching the string @{term "[a,b,c]"}. The first phase (the arrows from |
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
420 |
left to right) is \Brz's matcher building succesive derivatives. If at the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
421 |
last regular expression is @{term nullable}, then functions of the |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
422 |
second phase are called: first @{term mkeps} calculates a value witnessing |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
423 |
how the empty string has been recognised by @{term "r\<^sub>4"}. After |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
424 |
that the function @{term inj} `injects back' the characters of the string into |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
425 |
the values (the arrows from right to left). |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
426 |
\label{Sulz}} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
427 |
\end{figure} |
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
428 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
429 |
\begin{center} |
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
430 |
\begin{tabular}{lcl} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
431 |
@{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
432 |
@{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
433 |
@{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
434 |
@{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
435 |
\end{tabular} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
436 |
\end{center} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
437 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
438 |
\noindent Note that this function needs only to be partially defined, |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
439 |
namely only for regular expressions that are nullable. In case @{const |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
440 |
nullable} fails, the string @{term "[a,b,c]"} cannot be matched by @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
441 |
"r\<^sub>1"} and an error is raised. Note also how this function makes |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
442 |
some subtle choices leading to a POSIX value: for example if the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
443 |
alternative, say @{term "ALT r\<^sub>1 r\<^sub>2"}, can match the empty |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
444 |
string and furthermore @{term "r\<^sub>1"} can match the empty string, |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
445 |
then we return a @{const Left}-value. The @{const Right}-value will only |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
446 |
be returned if @{term "r\<^sub>1"} is not nullable. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
447 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
448 |
The most interesting novelty from Sulzmann and Lu \cite{Sulzmann2014} is |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
449 |
the construction value for how @{term "r\<^sub>1"} can match the string |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
450 |
@{term "[a,b,c]"} from the value how the last derivative, @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
451 |
"r\<^sub>4"} in Fig~\ref{Sulz}, can match the empty string. Sulzmann and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
452 |
Lu acchieve this by stepwise ``injecting back'' the characters into the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
453 |
values thus inverting the operation of building derivatives on the level |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
454 |
of values. The corresponding function, called @{term inj}, takes three |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
455 |
arguments, a regular expression, a character and a value. For example in |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
456 |
the first @{term inj}-step in Fig~\ref{Sulz} the regular expression @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
457 |
"r\<^sub>3"}, the character @{term c} from the last derivative step and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
458 |
@{term "v\<^sub>4"}, which is the value corresponding to the derivative |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
459 |
regular expression @{term "r\<^sub>4"}. The result is the new value @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
460 |
"v\<^sub>3"}. The final result of the algorithm is the value @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
461 |
"v\<^sub>1"} corresponding to the input regular expression. The @{term |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
462 |
inj} function is by recursion on the regular expression and by analysing |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
463 |
the shape of values. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
464 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
465 |
\begin{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
466 |
\begin{tabular}{lcl} |
117
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
467 |
@{thm (lhs) injval.simps(1)[of "d" "DUMMY"]} & $\dn$ & @{thm (rhs) injval.simps(1)}\\ |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
468 |
@{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
469 |
@{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
470 |
@{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
471 |
@{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
472 |
@{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
473 |
& @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
474 |
@{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
475 |
& @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
476 |
@{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
477 |
& @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
478 |
@{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
479 |
& @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
480 |
\end{tabular} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
481 |
\end{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
482 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
483 |
\noindent To better understand what is going on in this definition it |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
484 |
might be instructive to look first at the three sequence cases (clauses |
117
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
485 |
4--6). In each case we need to construct an ``injected value'' for @{term |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
486 |
"SEQ r\<^sub>1 r\<^sub>2"}. Recall the clause of the @{const der}-function |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
487 |
for sequence regular expressions: |
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
488 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
489 |
\begin{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
490 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} $\dn$ @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
491 |
\end{center} |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
492 |
|
117
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
493 |
\noindent Consider first the else-branch where the derivative is @{term |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
494 |
"SEQ (der c r\<^sub>1) r\<^sub>2"}. The corresponding value must therefore |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
495 |
be the form @{term "Seq v\<^sub>1 v\<^sub>2"}, which matches the fourth |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
496 |
clause of @{term inj}. In the if-branch the derivative is an alternative, |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
497 |
namely @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}. |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
498 |
This means we either have to consider a @{text Left}- or @{text |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
499 |
Right}-value. In case of the @{text Left}-value we know further it must be |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
500 |
a value for a sequence regular expression. Therefore the pattern we |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
501 |
match in the fifth clause is @{term "Left (Seq v\<^sub>1 v\<^sub>2)"}, while |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
502 |
in the sixth it is just @{term "Right v\<^sub>2"}. One more interesting point |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
503 |
is in the right-hand side of the sixth clause: since in this case the regular |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
504 |
expression @{text "r\<^sub>1"} does not ``contribute'' in matching the string, |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
505 |
that is only matches the empty string, we need to call @{const mkeps} in order |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
506 |
to construct a value how @{term "r\<^sub>1"} can match this empty string. |
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
507 |
|
2c4ffcc95399
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
116
diff
changeset
|
508 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
509 |
NOT DONE YET |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
510 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
511 |
Therefore there are, for example, three |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
512 |
cases for sequence regular expressions (for all possible shapes of the |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
513 |
value). |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
514 |
|
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
515 |
Again the virtues of this algorithm is that it can be |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
516 |
implemented with ease in a functional programming language and |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
517 |
also in Isabelle/HOL. |
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
518 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
519 |
The well-known idea of POSIX lexing is informally defined in (for example) |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
520 |
\cite{posix}; as correctly argued in \cite{Sulzmann2014}, this needs formal |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
521 |
specification. The rough idea is that, in contrast to the so-called GREEDY |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
522 |
algorithm, POSIX lexing chooses to match more deeply and using left choices |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
523 |
rather than a right choices. For example, note that to match the string |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
524 |
@{term "[a, b]"} with the regular expression $(a + \mts)\circ (b+ab)$ the matching |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
525 |
will return $( Void, Right(ab))$ rather than $(Left\ a, Left\ b)$. [The |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
526 |
regular expression $ab$ is short for $(Lit\ a) \circ (Lit\ b)$.] Similarly, |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
527 |
to match {\em ``a''} with $(a+a)$ the leftmost $a$ will be chosen. |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
528 |
|
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
529 |
We use a simple inductive definition to specify this notion, incorporating |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
530 |
the POSIX-specific choices into the side-conditions for the rules $R tl |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
531 |
+_2$, $R tl\circ$ and $R tl*$ (as they are now called). By contrast, |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
532 |
\cite{Sulzmann2014} defines a relation between values and argues that there is a |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
533 |
maximum value, as given by the derivative-based algorithm yet to be spelt |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
534 |
out. The relation we define is ternary, relating strings, values and regular |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
535 |
expressions. |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
536 |
|
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
537 |
Our Posix relation @{term "s \<in> r \<rightarrow> v"} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
538 |
|
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
539 |
\begin{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
540 |
\begin{tabular}{c} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
541 |
@{thm[mode=Axiom] PMatch.intros(1)} \qquad |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
542 |
@{thm[mode=Axiom] PMatch.intros(2)}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
543 |
@{thm[mode=Rule] PMatch.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\qquad |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
544 |
@{thm[mode=Rule] PMatch.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
545 |
\multicolumn{1}{p{5cm}}{@{thm[mode=Rule] PMatch.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
546 |
@{thm[mode=Rule] PMatch.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
547 |
@{thm[mode=Axiom] PMatch.intros(7)}\medskip\\ |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
548 |
\end{tabular} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
549 |
\end{center} |
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
550 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
551 |
*} |
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
552 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
553 |
section {* The Argument by Sulzmmann and Lu *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
554 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
555 |
section {* Conclusion *} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
556 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
557 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
558 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
559 |
Nipkow lexer from 2000 |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
560 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
561 |
*} |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
562 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
563 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
564 |
text {* |
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
565 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
566 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
567 |
|
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
568 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
569 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
570 |
Values |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
571 |
|
113
90fe1a1d7d0e
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
112
diff
changeset
|
572 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
573 |
|
108
73f7dc60c285
updated paper
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
107
diff
changeset
|
574 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
575 |
|
114
8b41d01b5e5d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
113
diff
changeset
|
576 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
577 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
578 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
579 |
The @{const mkeps} function |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
580 |
|
111
289728193164
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
110
diff
changeset
|
581 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
582 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
583 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
584 |
The @{text inj} function |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
585 |
|
116
022503caa187
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
115
diff
changeset
|
586 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
587 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
588 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
589 |
The inhabitation relation: |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
590 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
591 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
592 |
\begin{tabular}{c} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
593 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
594 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
595 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
596 |
@{thm[mode=Axiom] Prf.intros(4)} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
597 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
598 |
@{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
599 |
@{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
600 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
601 |
\end{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
602 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
603 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
604 |
We have also introduced a slightly restricted version of this relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
605 |
where the last rule is restricted so that @{term "flat v \<noteq> []"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
606 |
This relation for \emph{non-problematic} is written @{term "\<Turnstile> v : r"}. |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
607 |
\bigskip |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
608 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
609 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
610 |
\noindent |
112
698967eceaf1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
111
diff
changeset
|
611 |
|
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
612 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
613 |
\noindent |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
614 |
Our version of Sulzmann's ordering relation |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
615 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
616 |
\begin{center} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
617 |
\begin{tabular}{c} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
618 |
@{thm[mode=Rule] ValOrd.intros(2)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
619 |
@{thm[mode=Rule] ValOrd.intros(1)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "v\<^sub>1" "r\<^sub>1"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
620 |
@{thm[mode=Rule] ValOrd.intros(3)[of "v\<^sub>1" "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
621 |
@{thm[mode=Rule] ValOrd.intros(4)[of "v\<^sub>2" "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
622 |
@{thm[mode=Rule] ValOrd.intros(5)[of "v\<^sub>2" "r\<^sub>2" "v\<^sub>2'" "r\<^sub>1"]} \qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
623 |
@{thm[mode=Rule] ValOrd.intros(6)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>1'" "r\<^sub>2"]} \medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
624 |
@{thm[mode=Axiom] ValOrd.intros(7)}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
625 |
@{thm[mode=Axiom] ValOrd.intros(8)[of "c"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
626 |
@{thm[mode=Rule] ValOrd.intros(9)[of "v" "vs" "r"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
627 |
@{thm[mode=Rule] ValOrd.intros(10)[of "v" "vs" "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
628 |
@{thm[mode=Rule] ValOrd.intros(11)[of "v\<^sub>1" "r" "v\<^sub>2" "vs\<^sub>1" "vs\<^sub>2"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
629 |
@{thm[mode=Rule] ValOrd.intros(12)[of "vs\<^sub>1" "r" "vs\<^sub>2" "v"]}\qquad |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
630 |
@{thm[mode=Axiom] ValOrd.intros(13)[of "r"]}\medskip\\ |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
631 |
\end{tabular} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
632 |
\end{center} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
633 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
634 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
635 |
A prefix of a string s |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
636 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
637 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
638 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
639 |
@{thm prefix_def[of "s\<^sub>1" "s\<^sub>2"]} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
640 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
641 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
642 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
643 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
644 |
Values and non-problematic values |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
645 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
646 |
\begin{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
647 |
\begin{tabular}{c} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
648 |
@{thm Values_def}\medskip\\ |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
649 |
\end{tabular} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
650 |
\end{center} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
651 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
652 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
653 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
654 |
The point is that for a given @{text s} and @{text r} there are only finitely many |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
655 |
non-problematic values. |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
656 |
*} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
657 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
658 |
text {* |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
659 |
\noindent |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
660 |
Some lemmas we have proved:\bigskip |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
661 |
|
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
662 |
@{thm L_flat_Prf} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
663 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
664 |
@{thm L_flat_NPrf} |
97
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
665 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
666 |
@{thm[mode=IfThen] mkeps_nullable} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
667 |
|
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
668 |
@{thm[mode=IfThen] mkeps_flat} |
38696f516c6b
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
95
diff
changeset
|
669 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
670 |
@{thm[mode=IfThen] Prf_injval} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
671 |
|
107
6adda4a667b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
105
diff
changeset
|
672 |
@{thm[mode=IfThen] Prf_injval_flat} |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
673 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
674 |
@{thm[mode=IfThen] PMatch_mkeps} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
675 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
676 |
@{thm[mode=IfThen] PMatch1(2)} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
677 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
678 |
@{thm[mode=IfThen] PMatch1N} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
679 |
|
100
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
680 |
@{thm[mode=IfThen] PMatch_determ(1)[of "s" "r" "v\<^sub>1" "v\<^sub>2"]} |
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
681 |
|
8b919b3d753e
strengthened PMatch to get determ
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
99
diff
changeset
|
682 |
\medskip |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
683 |
\noindent |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
684 |
This is the main theorem that lets us prove that the algorithm is correct according to |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
685 |
@{term "s \<in> r \<rightarrow> v"}: |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
686 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
687 |
@{thm[mode=IfThen] PMatch2} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
688 |
|
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
689 |
\mbox{}\bigskip |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
690 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
691 |
\noindent {\bf Proof} The proof is by induction on the definition of |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
692 |
@{const der}. Other inductions would go through as well. The |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
693 |
interesting case is for @{term "SEQ r\<^sub>1 r\<^sub>2"}. First we analyse the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
694 |
case where @{term "nullable r\<^sub>1"}. We have by induction hypothesis |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
695 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
696 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
697 |
\begin{array}{l} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
698 |
(IH1)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>1 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
699 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v"}\\ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
700 |
(IH2)\quad @{text "\<forall>s v."} \text{\;if\;} @{term "s \<in> der c r\<^sub>2 \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
701 |
\text{\;then\;} @{term "(c # s) \<in> r\<^sub>2 \<rightarrow> injval r\<^sub>2 c v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
702 |
\end{array} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
703 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
704 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
705 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
706 |
and have |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
707 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
708 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
709 |
@{term "s \<in> ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2) \<rightarrow> v"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
710 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
711 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
712 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
713 |
There are two cases what @{term v} can be: (1) @{term "Left v'"} and (2) @{term "Right v'"}. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
714 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
715 |
\begin{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
716 |
\item[(1)] We know @{term "s \<in> SEQ (der c r\<^sub>1) r\<^sub>2 \<rightarrow> v'"} holds, from which we |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
717 |
can infer that there are @{text "s\<^sub>1"}, @{term "s\<^sub>2"}, @{text "v\<^sub>1"}, @{term "v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
718 |
with |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
719 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
720 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
721 |
@{term "s\<^sub>1 \<in> der c r\<^sub>1 \<rightarrow> v\<^sub>1"} \qquad\text{and}\qquad @{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
722 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
723 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
724 |
and also |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
725 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
726 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
727 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
728 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
729 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
730 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
731 |
and have to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
732 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
733 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
734 |
@{term "((c # s\<^sub>1) @ s\<^sub>2) \<in> SEQ r\<^sub>1 r\<^sub>2 \<rightarrow> Seq (injval r\<^sub>1 c v\<^sub>1) v\<^sub>2"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
735 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
736 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
737 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
738 |
The two requirements @{term "(c # s\<^sub>1) \<in> r\<^sub>1 \<rightarrow> injval r\<^sub>1 c v\<^sub>1"} and |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
739 |
@{term "s\<^sub>2 \<in> r\<^sub>2 \<rightarrow> v\<^sub>2"} can be proved by the induction hypothese (IH1) and the |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
740 |
fact above. |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
741 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
742 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
743 |
This leaves to prove |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
744 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
745 |
\[ |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
746 |
@{term "\<not> (\<exists>s\<^sub>3 s\<^sub>4. s\<^sub>3 \<noteq> [] \<and> s\<^sub>3 @ s\<^sub>4 = s\<^sub>2 \<and> (c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 \<and> s\<^sub>4 \<in> L r\<^sub>2)"} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
747 |
\] |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
748 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
749 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
750 |
which holds because @{term "(c # s\<^sub>1) @ s\<^sub>3 \<in> L r\<^sub>1 "} implies @{term "s\<^sub>1 @ s\<^sub>3 \<in> L (der c r\<^sub>1) "} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
751 |
|
103
ffe5d850df62
added some slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
752 |
\item[(2)] This case is similar. |
102
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
753 |
\end{itemize} |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
754 |
|
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
755 |
\noindent |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
756 |
The final case is that @{term " \<not> nullable r\<^sub>1"} holds. This case again similar |
7f589bfecffa
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
101
diff
changeset
|
757 |
to the cases above. |
98
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
758 |
*} |
8b4c8cdd0b51
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
97
diff
changeset
|
759 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
760 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
761 |
text {* |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
762 |
%\noindent |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
763 |
%{\bf Acknowledgements:} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
764 |
%We are grateful for the comments we received from anonymous |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
765 |
%referees. |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
766 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
767 |
\bibliographystyle{plain} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
768 |
\bibliography{root} |
101
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
769 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
770 |
\section{Roy's Rules} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
771 |
|
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
772 |
\newcommand{\abs}[1]{\mid\!\! #1\!\! \mid} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
773 |
%%\newcommand{\mts}{\textit{``''} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
774 |
\newcommand{\tl}{\ \triangleleft\ } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
775 |
$$\inferrule[]{Void \tl \epsilon}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
776 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
777 |
\inferrule[]{Char\ c \tl Lit\ c}{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
778 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
779 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
780 |
{v_1 \tl r_1} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
781 |
{Left\ v_1 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
782 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
783 |
\inferrule[] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
784 |
{ v_2 \tl r_2 \\ \abs{v_2}\ \not\in\ L(r_1)} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
785 |
{Right\ v_2 \tl r_1 + r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
786 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
787 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
788 |
\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
789 |
{v_1 \tl r_1\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
790 |
v_2 \tl r_2\\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
791 |
s \in\ L(r_1\backslash\! \abs{v_1}) \ \land\ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
792 |
\abs{v_2}\!\backslash s\ \epsilon\ L(r_2) |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
793 |
\ \Rightarrow\ s = [] |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
794 |
} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
795 |
{(v_1, v_2) \tl r_1 \cdot r_2} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
796 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
797 |
$$\inferrule |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
798 |
{ v \tl r \\ vs \tl r^* \\ \abs{v}\ \not=\ []} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
799 |
{ (v :: vs) \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
800 |
\quad\quad |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
801 |
\inferrule{} |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
802 |
{ [] \tl r^* } |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
803 |
$$ |
7f4f8c34da95
fixed inj function
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
100
diff
changeset
|
804 |
|
95
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
805 |
*} |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
806 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
807 |
|
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
808 |
(*<*) |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
809 |
end |
a33d3040bf7e
started a paper and moved cruft to Attic
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
810 |
(*>*) |