author | Chengsong |
Wed, 23 Aug 2023 03:02:31 +0100 | |
changeset 668 | 3831621d7b14 |
parent 644 | 9f984ff20020 |
permissions | -rw-r--r-- |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1 |
(*<*) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
2 |
theory Paper |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
3 |
imports |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
4 |
"Posix.LexerSimp" |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
5 |
"Posix.FBound" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
6 |
"HOL-Library.LaTeXsugar" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
7 |
begin |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
8 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
9 |
declare [[show_question_marks = false]] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
10 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
11 |
notation (latex output) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
12 |
If ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
13 |
Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
14 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
15 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
16 |
abbreviation |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
17 |
"der_syn r c \<equiv> der c r" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
18 |
abbreviation |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
19 |
"ders_syn r s \<equiv> ders s r" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
20 |
abbreviation |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
21 |
"bder_syn r c \<equiv> bder c r" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
22 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
23 |
notation (latex output) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
24 |
der_syn ("_\\_" [79, 1000] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
25 |
ders_syn ("_\\_" [79, 1000] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
26 |
bder_syn ("_\\_" [79, 1000] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
27 |
bders ("_\\_" [79, 1000] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
28 |
bders_simp ("_\\\<^sub>b\<^sub>s\<^sub>i\<^sub>m\<^sub>p _" [79, 1000] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
29 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
30 |
ZERO ("\<^bold>0" 81) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
31 |
ONE ("\<^bold>1" 81) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
32 |
CH ("_" [1000] 80) and |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
33 |
ALT ("_ + _" [76,76] 77) and |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
34 |
SEQ ("_ \<cdot> _" [78,78] 78) and |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
35 |
STAR ("_\<^sup>*" [79] 78) and |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
36 |
NTIMES ("_\<^sup>{\<^bsup>_\<^esup>\<^sup>}" [79] 78) and |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
37 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
38 |
val.Void ("Empty" 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
39 |
val.Char ("Char _" [1000] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
40 |
val.Left ("Left _" [79] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
41 |
val.Right ("Right _" [1000] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
42 |
val.Seq ("Seq _ _" [79,79] 78) and |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
43 |
val.Stars ("Stars _" [1000] 78) and |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
44 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
45 |
Prf ("\<turnstile> _ : _" [75,75] 75) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
46 |
Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
47 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
48 |
flat ("|_|" [75] 74) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
49 |
flats ("|_|" [72] 74) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
50 |
injval ("inj _ _ _" [79,77,79] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
51 |
mkeps ("mkeps _" [79] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
52 |
length ("len _" [73] 73) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
53 |
set ("_" [73] 73) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
54 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
55 |
AZERO ("ZERO" 81) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
56 |
AONE ("ONE _" [79] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
57 |
ACHAR ("CHAR _ _" [79, 79] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
58 |
AALTs ("ALTs _ _" [77,77] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
59 |
ASEQ ("SEQ _ _ _" [79, 79,79] 78) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
60 |
ASTAR ("STAR _ _" [79, 79] 78) and |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
61 |
ANTIMES ("NT _ _ _" [79, 79, 79] 78) and |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
62 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
63 |
code ("code _" [79] 74) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
64 |
intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
65 |
erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
66 |
bnullable ("bnullable _" [1000] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
67 |
bsimp_AALTs ("bsimpALT _ _" [10,1000] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
68 |
bsimp_ASEQ ("bsimpSEQ _ _ _" [10,1000,1000] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
69 |
bmkeps ("bmkeps _" [1000] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
70 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
71 |
eq1 ("_ \<approx> _" [77,77] 76) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
72 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
73 |
srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
74 |
rrewrites ("_ \<^latex>\<open>\\mbox{$\\,\\leadsto^*$}\<close> _" [71, 71] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
75 |
srewrites ("_ \<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}^*$}\<close> _" [71, 71] 80) and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
76 |
blexer_simp ("blexer\<^sup>+" 1000) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
77 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
78 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
79 |
lemma better_retrieve: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
80 |
shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
81 |
and "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v" |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
82 |
apply (metis list.exhaust retrieve.simps(4)) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
83 |
by (metis list.exhaust retrieve.simps(5)) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
84 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
85 |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
86 |
lemma better_retrieve2: |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
87 |
shows "retrieve (ANTIMES bs r (n + 1)) (Stars (v#vs)) = |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
88 |
bs @ [Z] @ retrieve r v @ retrieve (ANTIMES [] r n) (Stars vs)" |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
89 |
by auto |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
90 |
(*>*) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
91 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
92 |
section {* Introduction *} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
93 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
94 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
95 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
96 |
In the last fifteen or so years, Brzozowski's derivatives of regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
97 |
expressions have sparked quite a bit of interest in the functional |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
98 |
programming and theorem prover communities. |
563
c92a41d9c4da
updated paper and theories to include n-times
Christian Urban <christian.urban@kcl.ac.uk>
parents:
502
diff
changeset
|
99 |
Derivatives of a |
642 | 100 |
regular expressions, written @{term "der c r"}, give a simple solution |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
101 |
to the problem of matching a string @{term s} with a regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
102 |
expression @{term r}: if the derivative of @{term r} w.r.t.\ (in |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
103 |
succession) all the characters of the string matches the empty string, |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
104 |
then @{term r} matches @{term s} (and {\em vice versa}). |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
105 |
The beauty of |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
106 |
Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
107 |
expressible in any functional language, and easily definable and |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
108 |
reasoned about in theorem provers---the definitions just consist of |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
109 |
inductive datatypes and simple recursive functions. Another attractive |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
110 |
feature of derivatives is that they can be easily extended to \emph{bounded} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
111 |
regular expressions, such as @{term "r"}$^{\{@{text n}\}}$ or @{term r}$^{\{..@{text n}\}}$, where numbers or |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
112 |
intervals of numbers specify how many times a regular expression should be used |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
113 |
during matching. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
114 |
|
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
115 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
116 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
117 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
118 |
However, there are two difficulties with derivative-based matchers: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
119 |
First, Brzozowski's original matcher only generates a yes/no answer |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
120 |
for whether a regular expression matches a string or not. This is too |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
121 |
little information in the context of lexing where separate tokens must |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
122 |
be identified and also classified (for example as keywords |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
123 |
or identifiers). Sulzmann and Lu~\cite{Sulzmann2014} overcome this |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
124 |
difficulty by cleverly extending Brzozowski's matching |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
125 |
algorithm. Their extended version generates additional information on |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
126 |
\emph{how} a regular expression matches a string following the POSIX |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
127 |
rules for regular expression matching. They achieve this by adding a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
128 |
second ``phase'' to Brzozowski's algorithm involving an injection |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
129 |
function. In our own earlier work we provided the formal |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
130 |
specification of what POSIX matching means and proved in Isabelle/HOL |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
131 |
the correctness |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
132 |
of Sulzmann and Lu's extended algorithm accordingly |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
133 |
\cite{AusafDyckhoffUrban2016}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
134 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
135 |
The second difficulty is that Brzozowski's derivatives can |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
136 |
grow to arbitrarily big sizes. For example if we start with the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
137 |
regular expression \mbox{@{text "(a + aa)\<^sup>*"}} and take |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
138 |
successive derivatives according to the character $a$, we end up with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
139 |
a sequence of ever-growing derivatives like |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
140 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
141 |
\def\ll{\stackrel{\_\backslash{} a}{\longrightarrow}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
142 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
143 |
\begin{tabular}{rll} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
144 |
$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
145 |
& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
146 |
& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
147 |
& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
148 |
& $\ll$ & \ldots \hspace{15mm}(regular expressions of sizes 98, 169, 283, 468, 767, \ldots) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
149 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
150 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
151 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
152 |
\noindent where after around 35 steps we run out of memory on a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
153 |
typical computer (we shall define shortly the precise details of our |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
154 |
regular expressions and the derivative operation). Clearly, the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
155 |
notation involving $\ZERO$s and $\ONE$s already suggests |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
156 |
simplification rules that can be applied to regular regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
157 |
expressions, for example $\ZERO{}\,r \Rightarrow \ZERO$, $\ONE{}\,r |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
158 |
\Rightarrow r$, $\ZERO{} + r \Rightarrow r$ and $r + r \Rightarrow |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
159 |
r$. While such simple-minded simplifications have been proved in our |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
160 |
earlier work to preserve the correctness of Sulzmann and Lu's |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
161 |
algorithm \cite{AusafDyckhoffUrban2016}, they unfortunately do |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
162 |
\emph{not} help with limiting the growth of the derivatives shown |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
163 |
above: the growth is slowed, but some derivatives can still grow rather |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
164 |
quickly beyond any finite bound. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
165 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
166 |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
167 |
Sulzmann and Lu address this ``growth problem'' in a second algorithm |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
168 |
\cite{Sulzmann2014} where they introduce bitcoded |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
169 |
regular expressions. In this version, POSIX values are |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
170 |
represented as bitsequences and such sequences are incrementally generated |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
171 |
when derivatives are calculated. The compact representation |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
172 |
of bitsequences and regular expressions allows them to define a more |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
173 |
``aggressive'' simplification method that keeps the size of the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
174 |
derivatives finitely bounded no matter what the length of the string is. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
175 |
They make some informal claims about the correctness and linear behaviour |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
176 |
of this version, but do not provide any supporting proof arguments, not |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
177 |
even ``pencil-and-paper'' arguments. They write about their bitcoded |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
178 |
\emph{incremental parsing method} (that is the algorithm to be fixed and formalised |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
179 |
in this paper): |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
180 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
181 |
\begin{quote}\it |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
182 |
``Correctness Claim: We further claim that the incremental parsing |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
183 |
method [..] in combination with the simplification steps [..] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
184 |
yields POSIX parse trees. We have tested this claim |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
185 |
extensively [..] but yet |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
186 |
have to work out all proof details.'' \cite[Page 14]{Sulzmann2014} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
187 |
\end{quote} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
188 |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
189 |
\noindent{}\textbf{Contributions:} We fill this gap by implementing in Isabelle/HOL |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
190 |
our version of the derivative-based lexing algorithm of Sulzmann and Lu |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
191 |
\cite{Sulzmann2014} where regular expressions are annotated with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
192 |
bitsequences. We define the crucial simplification function as a |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
193 |
recursive function, without the need of a fixpoint operation. One objective of |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
194 |
the simplification function is to remove duplicates of regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
195 |
expressions. For this Sulzmann and Lu use in their paper the standard |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
196 |
@{text nub} function from Haskell's list library, but this function |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
197 |
does not achieve the intended objective with bitcoded regular expressions. The |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
198 |
reason is that in the bitcoded setting, each copy generally has a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
199 |
different bitcode annotation---so @{text nub} would never ``fire''. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
200 |
Inspired by Scala's library for lists, we shall instead use a @{text |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
201 |
distinctWith} function that finds duplicates under an ``erasing'' function |
616 | 202 |
that deletes bitcodes before comparing regular expressions. |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
203 |
We shall also introduce our \emph{own} arguments and definitions for |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
204 |
establishing the correctness of the bitcoded algorithm when |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
205 |
simplifications are included. Finally we |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
206 |
establish that the size of derivatives can be finitely bounded.\medskip%\footnote{ In this paper, we shall first briefly introduce the basic notions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
207 |
%of regular expressions and describe the definition |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
208 |
%of POSIX lexing from our earlier work \cite{AusafDyckhoffUrban2016}. This serves |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
209 |
%as a reference point for what correctness means in our Isabelle/HOL proofs. We shall then prove |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
210 |
%the correctness for the bitcoded algorithm without simplification, and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
211 |
%after that extend the proof to include simplification.}\smallskip |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
212 |
%\mbox{}\\[-10mm] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
213 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
214 |
\noindent In this paper, we shall first briefly introduce the basic notions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
215 |
of regular expressions and describe the definition |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
216 |
of POSIX lexing from our earlier work \cite{AusafDyckhoffUrban2016}. This serves |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
217 |
as a reference point for what correctness means in our Isabelle/HOL proofs. We shall then prove |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
218 |
the correctness for the bitcoded algorithm without simplification, and |
643 | 219 |
after that extend the proof to include simplification. |
220 |
Our Isabelle code including the results from Sec.~5 is available |
|
221 |
from \textcolor{darkblue}{\url{https://github.com/urbanchr/posix}}. |
|
222 |
\mbox{}\\[-6mm] |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
223 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
224 |
*} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
225 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
226 |
section {* Background *} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
227 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
228 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
229 |
In our Isabelle/HOL formalisation strings are lists of characters with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
230 |
the empty string being represented by the empty list, written $[]$, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
231 |
and list-cons being written as $\_\!::\!\_\,$; string |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
232 |
concatenation is $\_ \,@\, \_\,$. We often use the usual |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
233 |
bracket notation for lists also for strings; for example a string |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
234 |
consisting of just a single character $c$ is written $[c]$. |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
235 |
Our regular expressions are defined as the following inductive |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
236 |
datatype:\\[-4mm] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
237 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
238 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
239 |
@{text "r ::="} \; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
240 |
@{const "ZERO"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
241 |
@{const "ONE"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
242 |
@{term "CH c"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
243 |
@{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
244 |
@{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$ |
563
c92a41d9c4da
updated paper and theories to include n-times
Christian Urban <christian.urban@kcl.ac.uk>
parents:
502
diff
changeset
|
245 |
@{term "STAR r"} $\mid$ |
c92a41d9c4da
updated paper and theories to include n-times
Christian Urban <christian.urban@kcl.ac.uk>
parents:
502
diff
changeset
|
246 |
@{term "NTIMES r n"} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
247 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
248 |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
249 |
\noindent where @{const ZERO} stands for the regular expression that |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
250 |
does not match any string, @{const ONE} for the regular expression |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
251 |
that matches only the empty string and @{term c} for matching a |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
252 |
character literal. The constructors $+$ and $\cdot$ represent |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
253 |
alternatives and sequences, respectively. We sometimes omit the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
254 |
$\cdot$ in a sequence regular expression for brevity. The |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
255 |
\emph{language} of a regular expression, written $L(r)$, is defined |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
256 |
as usual and we omit giving the definition here (see for example |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
257 |
\cite{AusafDyckhoffUrban2016}). |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
258 |
|
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
259 |
In our work here we also add to the usual ``basic'' regular |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
260 |
expressions the \emph{bounded} regular expression @{term "NTIMES r |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
261 |
n"} where the @{term n} specifies that @{term r} should match |
643 | 262 |
exactly @{term n}-times (it is not included in Sulzmann and Lu's original work). For brevity we omit the other bounded |
644 | 263 |
regular expressions @{text "r"}$^{\{..\textit{n}\}}$, |
264 |
@{text "r"}$^{\{\textit{n}..\}}$ |
|
265 |
and @{text "r"}$^{\{\textit{n}..\textit{m}\}}$ which specify intervals for how many |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
266 |
times @{text r} should match. The results presented in this paper |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
267 |
extend straightforwardly to them too. The importance of the bounded |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
268 |
regular expressions is that they are often used in practical |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
269 |
applications, such as Snort (a system for detecting network |
599 | 270 |
intrusions) and also in XML Schema definitions. According to Bj\"{o}rklund et |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
271 |
al~\cite{BjorklundMartensTimm2015}, bounded regular expressions |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
272 |
occur frequently in the latter and can have counters of up to |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
273 |
ten million. The problem is that tools based on the classic notion |
644 | 274 |
of automata need to expand @{text "r"}$^{\{\textit{n}\}}$ into @{text n} |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
275 |
connected copies of the automaton for @{text r}. This leads to very |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
276 |
inefficient matching algorithms or algorithms that consume large |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
277 |
amounts of memory. A classic example is the regular expression |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
278 |
\mbox{@{term "SEQ (SEQ (STAR (ALT a b)) a) (NTIMES (ALT a b) n)"}} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
279 |
where the minimal DFA requires at least $2^{n + 1}$ states (see |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
280 |
\cite{CountingSet2020}). Therefore regular expression matching |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
281 |
libraries that rely on the classic notion of DFAs often impose |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
282 |
adhoc limits for bounded regular expressions: For example in the |
571
a76458dd9e4c
added paper about counting automata
Christian Urban <christian.urban@kcl.ac.uk>
parents:
569
diff
changeset
|
283 |
regular expression matching library in the Go language and also in Google's RE2 library the regular expression |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
284 |
@{term "NTIMES a 1001"} is not permitted, because no counter can be |
643 | 285 |
above 1000; and in the regular expression library in Rust |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
286 |
expressions such as @{text "a\<^bsup>{1000}{100}{5}\<^esup>"} give an error |
642 | 287 |
message for being too big. Up until recently,\footnote{up until version 1.5.4 of the regex |
288 |
library in Rust; see also CVE-2022-24713.} Rust |
|
643 | 289 |
however happily generated automata for regular expressions such as |
642 | 290 |
@{text "a\<^bsup>{0}{4294967295}\<^esup>"}. This was due to a bug |
291 |
in the algorithm that decides when a regular expression is acceptable |
|
643 | 292 |
or too big according to Rust's classification (it did not account for the fact that @{text "a\<^bsup>{0}\<^esup>"} and similar examples can match the empty string). We shall come back to |
642 | 293 |
this example later in the paper. |
294 |
These problems can of course be solved in matching |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
295 |
algorithms where automata go beyond the classic notion and for |
643 | 296 |
instance include explicit counters (e.g.~\cite{CountingSet2020}). |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
297 |
The point here is that Brzozowski derivatives and the algorithms by |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
298 |
Sulzmann and Lu can be straightforwardly extended to deal with |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
299 |
bounded regular expressions and moreover the resulting code |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
300 |
still consists of only simple recursive functions and inductive |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
301 |
datatypes. Finally, bounded regular expressions |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
302 |
do not destroy our finite boundedness property, which we shall |
642 | 303 |
prove later on. |
304 |
||
305 |
%, because during the lexing process counters will only be |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
306 |
%decremented. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
307 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
308 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
309 |
Central to Brzozowski's regular expression matcher are two functions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
310 |
called @{text nullable} and \emph{derivative}. The latter is written |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
311 |
$r\backslash c$ for the derivative of the regular expression $r$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
312 |
w.r.t.~the character $c$. Both functions are defined by recursion over |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
313 |
regular expressions. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
314 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
315 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
316 |
\begin{tabular}{c @ {\hspace{-9mm}}c} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
317 |
\begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
318 |
@{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
319 |
@{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
320 |
@{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
321 |
@{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
322 |
@{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
323 |
& & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
324 |
& & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
325 |
% & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
326 |
@{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
327 |
@{thm (lhs) der.simps(7)} & $\dn$ & @{thm (rhs) der.simps(7)} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
328 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
329 |
& |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
330 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
331 |
@{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
332 |
@{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
333 |
@{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
334 |
@{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
335 |
@{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
336 |
@{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
337 |
@{thm (lhs) nullable.simps(7)} & $\dn$ & @{text "if"} @{text "n = 0"}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
338 |
& & @{text "then"} @{const "True"}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
339 |
& & @{text "else"} @{term "nullable r"} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
340 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
341 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
342 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
343 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
344 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
345 |
We can extend this definition to give derivatives w.r.t.~strings, |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
346 |
namely as @{thm (lhs) ders.simps(1)}$\dn$@{thm (rhs) ders.simps(1)} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
347 |
and @{thm (lhs) ders.simps(2)}$\dn$@{thm (rhs) ders.simps(2)}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
348 |
Using @{text nullable} and the derivative operation, we can |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
349 |
define a simple regular expression matcher, namely |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
350 |
$@{text "match s r"} \dn @{term nullable}(r\backslash s)$. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
351 |
This is essentially Brzozowski's algorithm from 1964. Its |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
352 |
main virtue is that the algorithm can be easily implemented as a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
353 |
functional program (either in a functional programming language or in |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
354 |
a theorem prover). The correctness of @{text match} amounts to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
355 |
establishing the property:%\footnote{It is a fun exercise to formally prove this property in a theorem prover.} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
356 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
357 |
\begin{proposition}\label{matchcorr} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
358 |
@{text "match s r"} \;\;\text{if and only if}\;\; $s \in L(r)$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
359 |
\end{proposition} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
360 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
361 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
362 |
It is a fun exercise to formally prove this property in a theorem prover. |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
363 |
We are aware |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
364 |
of a mechanised correctness proof of Brzozowski's derivative-based matcher in HOL4 by |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
365 |
Owens and Slind~\cite{Owens2008}. Another one in Isabelle/HOL is part |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
366 |
of the work by Krauss and Nipkow~\cite{Krauss2011}. And another one |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
367 |
in Coq is given by Coquand and Siles \cite{Coquand2012}. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
368 |
Also Ribeiro and Du Bois give one in Agda~\cite{RibeiroAgda2017}. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
369 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
370 |
The novel idea of Sulzmann and Lu is to extend this algorithm for |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
371 |
lexing, where it is important to find out which part of the string |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
372 |
is matched by which part of the regular expression. |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
373 |
For this Sulzmann and Lu presented two lexing algorithms in their |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
374 |
paper~\cite{Sulzmann2014}. The first algorithm consists of two phases: first a |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
375 |
matching phase (which is Brzozowski's algorithm) and then a value |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
376 |
construction phase. The values encode \emph{how} a regular expression |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
377 |
matches a string. \emph{Values} are defined as the inductive datatype |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
378 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
379 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
380 |
@{text "v ::="} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
381 |
@{const "Void"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
382 |
@{term "val.Char c"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
383 |
@{term "Left v"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
384 |
@{term "Right v"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
385 |
@{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
386 |
@{term "Stars vs"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
387 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
388 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
389 |
\noindent where we use @{term vs} to stand for a list of values. The |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
390 |
string underlying a value can be calculated by a @{const flat} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
391 |
function, written @{term "flat DUMMY"}. It traverses a value and |
616 | 392 |
collects the characters contained in it (see \cite{AusafDyckhoffUrban2016}). |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
393 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
394 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
395 |
Sulzmann and Lu also define inductively an |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
396 |
inhabitation relation that associates values to regular expressions. Our |
616 | 397 |
version of this relation is defined by the following six rules: |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
398 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
399 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
400 |
\begin{tabular}{@ {}l@ {}} |
644 | 401 |
@{thm[mode=Axiom] Prf.intros(4)}\qquad |
402 |
@{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]}\qquad |
|
403 |
@{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>2" "r\<^sub>1"]}\qquad |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
404 |
@{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
405 |
@{thm[mode=Axiom] Prf.intros(5)[of "c"]}\qquad |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
406 |
@{thm[mode=Rule] Prf.intros(6)[of "vs" "r"]}\qquad |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
407 |
$\mprset{flushleft}\inferrule{ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
408 |
@{thm (prem 1) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}\\\\ |
642 | 409 |
@{thm (prem 2) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}\quad |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
410 |
@{thm (prem 3) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
411 |
} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
412 |
{@{thm (concl) Prf.intros(7)[of "vs\<^sub>1" "r" "vs\<^sub>2" "n"]}} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
413 |
$ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
414 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
415 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
416 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
417 |
\noindent Note that no values are associated with the regular expression |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
418 |
@{term ZERO}, since it cannot match any string. Interesting is our version |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
419 |
of the rule for @{term "STAR r"} where we require that each value |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
420 |
in @{term vs} flattens to a non-empty string. This means if @{term "STAR r"} ``fires'' |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
421 |
one or more times, then each copy needs to match a non-empty string. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
422 |
Similarly, in the rule |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
423 |
for @{term "NTIMES r n"} we require that the length of the list |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
424 |
@{term "vs\<^sub>1 @ vs\<^sub>2"} equals @{term n} (meaning the regular expression |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
425 |
@{text r} matches @{text n}-times) and that the first segment of this list |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
426 |
contains values that flatten to non-empty strings followed by a segment that |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
427 |
only contains values that flatten to the empty string. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
428 |
It is routine to establish how values ``inhabiting'' a regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
429 |
expression correspond to the language of a regular expression, namely |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
430 |
@{thm L_flat_Prf}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
431 |
|
616 | 432 |
In general there is more than one value inhabiting a regular |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
433 |
expression (meaning regular expressions can typically match more |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
434 |
than one string). But even when fixing a string from the language of the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
435 |
regular expression, there are generally more than one way of how the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
436 |
regular expression can match this string. POSIX lexing is about |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
437 |
identifying the unique value for a given regular expression and a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
438 |
string that satisfies the informal POSIX rules (see |
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
439 |
\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}). |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
440 |
%\footnote{POSIX |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
441 |
% lexing acquired its name from the fact that the corresponding |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
442 |
% rules were described as part of the POSIX specification for |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
443 |
% Unix-like operating systems \cite{POSIX}.} |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
444 |
Sometimes these |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
445 |
informal rules are called \emph{maximal munch rule} and \emph{rule priority}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
446 |
One contribution of our earlier paper is to give a convenient |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
447 |
specification for what POSIX values are (the inductive rules are shown in |
616 | 448 |
Fig~\ref{POSIXrules}). |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
449 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
450 |
\begin{figure}[t] |
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
451 |
\begin{center}\small% |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
452 |
\begin{tabular}{@ {\hspace{-2mm}}c@ {}} |
642 | 453 |
\\[-8.5mm] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
454 |
@{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
455 |
@{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
456 |
@{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
457 |
@{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
458 |
$\mprset{flushleft} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
459 |
\inferrule |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
460 |
{@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad |
642 | 461 |
@{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
462 |
@{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}} |
642 | 463 |
{@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\medskip\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
464 |
@{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\qquad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
465 |
$\mprset{flushleft} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
466 |
\inferrule |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
467 |
{@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
468 |
@{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
469 |
@{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
470 |
@{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}} |
642 | 471 |
{@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>\medskip\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
472 |
\mprset{sep=4mm} |
642 | 473 |
@{thm[mode=Rule] Posix.intros(9)}\<open>Pn[]\<close>\quad\; |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
474 |
$\mprset{flushleft} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
475 |
\inferrule |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
476 |
{@{thm (prem 1) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \qquad |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
477 |
@{thm (prem 2) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \qquad |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
478 |
@{thm (prem 3) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]} \\\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
479 |
@{thm (prem 4) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]}} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
480 |
{@{thm (concl) Posix.intros(8)[of "s\<^sub>1" "r" "v" "s\<^sub>2" n "vs"]}}$\<open>Pn+\<close> |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
481 |
\\[-4mm] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
482 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
483 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
484 |
\caption{The inductive definition of POSIX values taken from our earlier paper \cite{AusafDyckhoffUrban2016}. The ternary relation, written $(s, r) \rightarrow v$, formalises the notion |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
485 |
of given a string $s$ and a regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
486 |
expression $r$ what is the unique value $v$ that satisfies the informal POSIX constraints for |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
487 |
regular expression matching.\smallskip}\label{POSIXrules} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
488 |
\end{figure} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
489 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
490 |
The clever idea by Sulzmann and Lu \cite{Sulzmann2014} in their first algorithm is to define |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
491 |
an injection function on values that mirrors (but inverts) the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
492 |
construction of the derivative on regular expressions. Essentially it |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
493 |
injects back a character into a value. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
494 |
For this they define two functions called @{text mkeps} and @{text inj}: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
495 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
496 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
497 |
\begin{tabular}{@ {}l@ {}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
498 |
\begin{tabular}{@ {}lcl@ {}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
499 |
@{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
500 |
@{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
501 |
@{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
502 |
@{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\ |
642 | 503 |
@{thm (lhs) mkeps.simps(5)} & $\dn$ & @{thm (rhs) mkeps.simps(5)} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
504 |
\end{tabular} |
642 | 505 |
%\end{tabular} |
506 |
%\end{center} |
|
507 |
\smallskip\\ |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
508 |
|
642 | 509 |
%\begin{center} |
510 |
%\begin{tabular}{@ {}cc@ {}} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
511 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}@ {}} |
642 | 512 |
@{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)[of "c"]}\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
513 |
@{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
514 |
@{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
515 |
@{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
516 |
@{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
517 |
%! |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
518 |
@{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
519 |
& @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
520 |
@{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
521 |
& @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
522 |
@{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
523 |
@{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
524 |
%! |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
525 |
@{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
526 |
& @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
527 |
@{thm (lhs) injval.simps(8)[of "r" "n" "c" "v" "vs"]} & $\dn$ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
528 |
& @{thm (rhs) injval.simps(8)[of "r" "n" "c" "v" "vs"]} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
529 |
\end{tabular} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
530 |
%!& |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
531 |
%!\begin{tabular}{@ {\hspace{-3mm}}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}@ {}} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
532 |
|
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
533 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
534 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
535 |
\end{tabular}%!\smallskip |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
536 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
537 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
538 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
539 |
The function @{text mkeps} is run when the last derivative is nullable, that is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
540 |
the string to be matched is in the language of the regular expression. It generates |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
541 |
a value for how the last derivative can match the empty string. In case |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
542 |
of @{term "NTIMES r n"} we use the function @{term replicate} in order to generate |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
543 |
a list of exactly @{term n} copies, which is the length of the list we expect in this |
642 | 544 |
case. The injection function\footnote{While the character argument @{text c} is not |
545 |
strictly necessary in the @{text inj}-function for the fragment of regular expressions we |
|
644 | 546 |
use in this paper, it is necessary for extended regular expressions. For example for the range regular expression of the form @{text "[a-z]"}. |
642 | 547 |
We therefore keep this argument from the original formulation of @{text inj} by Sulzmann and Lu.} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
548 |
then calculates the corresponding value for each intermediate derivative until |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
549 |
a value for the original regular expression is generated. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
550 |
Graphically the algorithm by |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
551 |
Sulzmann and Lu can be illustrated by the following picture %in Figure~\ref{Sulz} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
552 |
where the path from the left to the right involving @{term derivatives}/@{const |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
553 |
nullable} is the first phase of the algorithm (calculating successive |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
554 |
\Brz's derivatives) and @{const mkeps}/@{text inj}, the path from right to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
555 |
left, the second phase. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
556 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
557 |
\begin{center} |
642 | 558 |
\begin{tikzpicture}[scale=0.85,node distance=8mm, |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
559 |
every node/.style={minimum size=6mm}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
560 |
\node (r1) {@{term "r\<^sub>1"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
561 |
\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
562 |
\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
563 |
\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
564 |
\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
565 |
\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
566 |
\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
567 |
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
568 |
\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
569 |
\draw[->,line width=1mm](r4) -- (v4); |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
570 |
\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
571 |
\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
572 |
\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
573 |
\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
574 |
\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
575 |
\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>}; |
642 | 576 |
\draw (r4) node[anchor=north west] {\;\raisebox{-5mm}{@{term "mkeps"}}}; |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
577 |
\end{tikzpicture} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
578 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
579 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
580 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
581 |
The picture shows the steps required when a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
582 |
regular expression, say @{text "r\<^sub>1"}, matches the string @{term |
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
583 |
"[a,b,c]"}. The first lexing algorithm by Sulzmann and Lu can be defined as:%\\[-8mm] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
584 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
585 |
% \begin{figure}[t] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
586 |
%\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
587 |
%\begin{tikzpicture}[scale=1,node distance=1cm, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
588 |
% every node/.style={minimum size=6mm}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
589 |
%\node (r1) {@{term "r\<^sub>1"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
590 |
%\node (r2) [right=of r1]{@{term "r\<^sub>2"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
591 |
%\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
592 |
%\node (r3) [right=of r2]{@{term "r\<^sub>3"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
593 |
%\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
594 |
%\node (r4) [right=of r3]{@{term "r\<^sub>4"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
595 |
%\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
596 |
%\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
597 |
%\node (v4) [below=of r4]{@{term "v\<^sub>4"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
598 |
%\draw[->,line width=1mm](r4) -- (v4); |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
599 |
%\node (v3) [left=of v4] {@{term "v\<^sub>3"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
600 |
%\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
601 |
%\node (v2) [left=of v3]{@{term "v\<^sub>2"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
602 |
%\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
603 |
%\node (v1) [left=of v2] {@{term "v\<^sub>1"}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
604 |
%\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
605 |
%\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}}; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
606 |
%\end{tikzpicture} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
607 |
%\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
608 |
%\mbox{}\\[-13mm] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
609 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
610 |
%\caption{The two phases of the first algorithm by Sulzmann \& Lu \cite{Sulzmann2014}, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
611 |
%matching the string @{term "[a,b,c]"}. The first phase (the arrows from |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
612 |
%left to right) is \Brz's matcher building successive derivatives. If the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
613 |
%last regular expression is @{term nullable}, then the functions of the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
614 |
%second phase are called (the top-down and right-to-left arrows): first |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
615 |
%@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
616 |
%how the empty string has been recognised by @{term "r\<^sub>4"}. After |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
617 |
%that the function @{term inj} ``injects back'' the characters of the string into |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
618 |
%the values. The value @{term "v\<^sub>1"} is the result of the algorithm representing |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
619 |
%the POSIX value for this string and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
620 |
%regular expression. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
621 |
%\label{Sulz}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
622 |
%\end{figure} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
623 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
624 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
625 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
626 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
627 |
\begin{tabular}{lcl} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
628 |
@{thm (lhs) lexer.simps(1)} & $\dn$ & @{thm (rhs) lexer.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
629 |
@{thm (lhs) lexer.simps(2)} & $\dn$ & @{text "case"} @{term "lexer (der c r) s"} @{text of} |
642 | 630 |
@{term "None"} @{text "\<Rightarrow>"} @{term None} |
631 |
%%& & \hspace{27mm} |
|
632 |
$|\;$ @{term "Some v"} @{text "\<Rightarrow>"} @{term "Some (injval r c v)"} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
633 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
634 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
635 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
636 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
637 |
We have shown in our earlier paper \cite{AusafDyckhoffUrban2016} that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
638 |
this algorithm is correct, that is it generates POSIX values. The |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
639 |
central property we established relates the derivative operation to the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
640 |
injection function. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
641 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
642 |
\begin{proposition}\label{Posix2} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
643 |
\textit{If} $(s,\; r\backslash c) \rightarrow v$ \textit{then} $(c :: s,\; r) \rightarrow$ \textit{inj} $r\; c\; v$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
644 |
\end{proposition} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
645 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
646 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
647 |
With this in place we were able to prove: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
648 |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
649 |
\begin{proposition}\mbox{}\label{lexercorrect} |
642 | 650 |
\textrm{(1)}\; @{thm (lhs) lexer_correct_None} if and only if @{thm (rhs) lexer_correct_None}.\\ |
651 |
\mbox{\hspace{29.5mm}}\textrm{(2)}\; @{thm (lhs) lexer_correct_Some} if and only if @{thm (rhs) lexer_correct_Some}. |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
652 |
% |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
653 |
% \smallskip\\ |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
654 |
%\begin{tabular}{ll} |
642 | 655 |
%(1) & @{thm (lhs) lexer_correct_None} \;if and only if\; @{thm (rhs) lexer_correct_None}\\ |
656 |
%(2) & @{thm (lhs) lexer_correct_Some} \;if and only if\; @{thm (rhs) lexer_correct_Some}\\ |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
657 |
%\end{tabular} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
658 |
\end{proposition} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
659 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
660 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
661 |
In fact we have shown that, in the success case, the generated POSIX value $v$ is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
662 |
unique and in the failure case that there is no POSIX value $v$ that satisfies |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
663 |
$(s, r) \rightarrow v$. While the algorithm is correct, it is excruciatingly |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
664 |
slow in cases where the derivatives grow arbitrarily (recall the example from the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
665 |
Introduction). However it can be used as a convenient reference point for the correctness |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
666 |
proof of the second algorithm by Sulzmann and Lu, which we shall describe next. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
667 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
668 |
*} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
669 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
670 |
section {* Bitcoded Regular Expressions and Derivatives *} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
671 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
672 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
673 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
674 |
In the second part of their paper \cite{Sulzmann2014}, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
675 |
Sulzmann and Lu describe another algorithm that also generates POSIX |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
676 |
values but dispenses with the second phase where characters are |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
677 |
injected ``back'' into values. For this they annotate bitcodes to |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
678 |
regular expressions, which we define in Isabelle/HOL as the datatype\medskip |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
679 |
|
642 | 680 |
\begin{center} |
681 |
%\noindent |
|
682 |
%\begin{minipage}{0.9\textwidth} |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
683 |
\,@{term breg} $\,::=\,$ @{term "AZERO"} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
684 |
$\,\mid$ @{term "AONE bs"} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
685 |
$\,\mid$ @{term "ACHAR bs c"} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
686 |
$\,\mid$ @{term "AALTs bs rs"} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
687 |
$\,\mid$ @{term "ASEQ bs r\<^sub>1 r\<^sub>2"} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
688 |
$\,\mid$ @{term "ASTAR bs r"} |
642 | 689 |
$\,\mid$ \mbox{@{term "ANTIMES bs r n"}}%\hfill\mbox{} |
690 |
%\end{minipage}\medskip |
|
691 |
\end{center} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
692 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
693 |
\noindent where @{text bs} stands for bitsequences; @{text r}, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
694 |
@{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
695 |
expressions; and @{text rs} for lists of bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
696 |
expressions. The binary alternative @{text "ALT bs r\<^sub>1 r\<^sub>2"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
697 |
is just an abbreviation for \mbox{@{text "ALTs bs [r\<^sub>1, r\<^sub>2]"}}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
698 |
For bitsequences we use lists made up of the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
699 |
constants @{text Z} and @{text S}. The idea with bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
700 |
expressions is to incrementally generate the value information (for |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
701 |
example @{text Left} and @{text Right}) as bitsequences. For this |
578
e71a6e2aca2d
updated paper and literature
Christian Urban <christian.urban@kcl.ac.uk>
parents:
571
diff
changeset
|
702 |
Sulzmann and Lu follow Nielsen and Henglein \cite{NielsenHenglein2011} |
e71a6e2aca2d
updated paper and literature
Christian Urban <christian.urban@kcl.ac.uk>
parents:
571
diff
changeset
|
703 |
and define a coding |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
704 |
function for how values can be coded into bitsequences. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
705 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
706 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
707 |
\begin{tabular}{c@ {\hspace{5mm}}c} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
708 |
\begin{tabular}{lcl} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
709 |
@{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
710 |
@{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
711 |
@{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
712 |
@{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
713 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
714 |
& |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
715 |
\begin{tabular}{lcl} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
716 |
@{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
717 |
@{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
718 |
@{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
719 |
\mbox{\phantom{XX}}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
720 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
721 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
722 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
723 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
724 |
\noindent |
642 | 725 |
As can be seen, this coding is ``lossy'' in the sense that it does not |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
726 |
record explicitly character values and also not sequence values (for |
642 | 727 |
them it just appends two bitsequences). However, the |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
728 |
different alternatives for @{text Left}, respectively @{text Right}, are recorded as @{text Z} and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
729 |
@{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
730 |
if there is still a value coming in the list of @{text Stars}, whereas @{text S} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
731 |
indicates the end of the list. The lossiness makes the process of |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
732 |
decoding a bit more involved, but the point is that if we have a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
733 |
regular expression \emph{and} a bitsequence of a corresponding value, |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
734 |
then we can always decode the value accurately~(see Fig.~\ref{decode}). |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
735 |
The function \textit{decode} checks whether all of the bitsequence is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
736 |
consumed and returns the corresponding value as @{term "Some v"}; otherwise |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
737 |
it fails with @{text "None"}. We can establish that for a value $v$ |
616 | 738 |
inhabiting a regular expression $r$, the decoding of its |
578
e71a6e2aca2d
updated paper and literature
Christian Urban <christian.urban@kcl.ac.uk>
parents:
571
diff
changeset
|
739 |
bitsequence never fails (see also \cite{NielsenHenglein2011}). |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
740 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
741 |
%The decoding can be |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
742 |
%defined by using two functions called $\textit{decode}'$ and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
743 |
%\textit{decode}: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
744 |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
745 |
\begin{figure}[t] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
746 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
747 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
599 | 748 |
$\textit{decode}'\,bs\,(\ONE)$ & $\;\dn\;$ & $(\Empty, bs)$\\ |
749 |
$\textit{decode}'\,bs\,(c)$ & $\;\dn\;$ & $(\Char\,c, bs)$\\ |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
750 |
$\textit{decode}'\,(\Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
751 |
$\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
752 |
(\Left\,v, bs_1)$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
753 |
$\textit{decode}'\,(\S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
754 |
$\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
755 |
(\Right\,v, bs_1)$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
756 |
$\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
757 |
$\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
758 |
& & $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
759 |
\hspace{2mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
760 |
$\textit{decode}'\,(\S\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
761 |
$\textit{decode}'\,(\Z\!::\!bs)\,(r^*)$ & $\dn$ & |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
762 |
$\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
763 |
& & $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
764 |
\hspace{2mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
765 |
$\textit{decode}'\,bs\,(r^{\{n\}})$ & $\dn$ & $\textit{decode}'\,bs\,r^*$\smallskip\smallskip\\ |
599 | 766 |
|
767 |
$\textit{decode}\,bs\,r$ & $\dn$ & |
|
768 |
$\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\ |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
769 |
& & $\;\;\;\,\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;\textit{else}\;\textit{None}$\\[-5mm] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
770 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
771 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
772 |
\caption{Two functions, called $\textit{decode}'$ and \textit{decode}, for decoding a value from a bitsequence with the help of a regular expression.\\[-5mm]}\label{decode} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
773 |
\end{figure} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
774 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
775 |
%\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
776 |
%The function \textit{decode} checks whether all of the bitsequence is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
777 |
%consumed and returns the corresponding value as @{term "Some v"}; otherwise |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
778 |
%it fails with @{text "None"}. We can establish that for a value $v$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
779 |
%inhabited by a regular expression $r$, the decoding of its |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
780 |
%bitsequence never fails. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
781 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
782 |
\begin{lemma}\label{codedecode}\it |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
783 |
If $\;\vdash v : r$ then |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
784 |
$\;\textit{decode}\,(\textit{code}\, v)\,r = \textit{Some}\, v$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
785 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
786 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
787 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
788 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
789 |
Sulzmann and Lu define the function \emph{internalise} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
790 |
in order to transform (standard) regular expressions into annotated |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
791 |
regular expressions. We write this operation as $r^\uparrow$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
792 |
This internalisation uses the following |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
793 |
\emph{fuse} function.\medskip |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
794 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
795 |
% |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
796 |
%!\begin{center} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
797 |
\noindent\begin{minipage}{\textwidth} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
798 |
\begin{tabular}{@ {}c@ {\hspace{3mm}}c@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
799 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
800 |
$\textit{fuse}\,bs\,(\textit{ZERO})$ & $\dn$ & $\textit{ZERO}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
801 |
$\textit{fuse}\,bs\,(\textit{ONE}\,bs')$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
802 |
$\textit{ONE}\,(bs\,@\,bs')$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
803 |
$\textit{fuse}\,bs\,(\textit{CHAR}\,bs'\,c)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
804 |
$\textit{CHAR}\,(bs\,@\,bs')\,c$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
805 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
806 |
& |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
807 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
808 |
$\textit{fuse}\,bs\,(\textit{ALTs}\,bs'\,rs)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
809 |
$\textit{ALTs}\,(bs\,@\,bs')\,rs$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
810 |
$\textit{fuse}\,bs\,(\textit{SEQ}\,bs'\,r_1\,r_2)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
811 |
$\textit{SEQ}\,(bs\,@\,bs')\,r_1\,r_2$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
812 |
$\textit{fuse}\,bs\,(\textit{STAR}\,bs'\,r)$ & $\dn$ & |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
813 |
$\textit{STAR}\,(bs\,@\,bs')\,r$\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
814 |
$\textit{fuse}\,bs\,(\textit{NT}\,bs'\,r\,n)$ & $\dn$ & |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
815 |
$\textit{NT}\,(bs\,@\,bs')\,r\,n$ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
816 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
817 |
\end{tabular} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
818 |
\end{minipage}\medskip |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
819 |
%!\end{center} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
820 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
821 |
\noindent |
616 | 822 |
This function ``fuses'' a bitsequence to the topmost constructor of an bitcoded regular expressions. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
823 |
A regular expression can then be \emph{internalised} into a bitcoded |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
824 |
regular expression as follows: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
825 |
% |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
826 |
%!\begin{center} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
827 |
%!\begin{tabular}{@ {}c@ {\hspace{2mm}}c@ {\hspace{2mm}}c@ {}} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
828 |
%!\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
829 |
%!$(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
830 |
%!$(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
831 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
832 |
%!& |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
833 |
%!\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
834 |
%!$(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
835 |
%!$(r^*)^\uparrow$ & $\dn$ & $\textit{STAR}\;[]\,r^\uparrow$\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
836 |
%!$(r^{\{n\}})^\uparrow$ & $\dn$ & |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
837 |
%! $\textit{NT}\;[]\,r^\uparrow\,n$ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
838 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
839 |
%!& |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
840 |
%!\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
841 |
%!$(r_1 + r_2)^\uparrow$ & $\dn$ & |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
842 |
%! $\textit{ALT}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\, |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
843 |
%! (\textit{fuse}\,[\S]\,r_2^\uparrow)$\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
844 |
%!$(r_1\cdot r_2)^\uparrow$ & $\dn$ & |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
845 |
%! $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
846 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
847 |
%!\end{tabular} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
848 |
%!\end{center} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
849 |
%! |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
850 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
851 |
\begin{tabular}{@ {}c@ {\hspace{7mm}}c@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
852 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
853 |
$(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
854 |
$(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
855 |
$(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
856 |
$(r^*)^\uparrow$ & $\dn$ & $\textit{STAR}\;[]\,r^\uparrow$\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
857 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
858 |
& |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
859 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
860 |
$(r_1 + r_2)^\uparrow$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
861 |
$\textit{ALT}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
862 |
(\textit{fuse}\,[\S]\,r_2^\uparrow)$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
863 |
$(r_1\cdot r_2)^\uparrow$ & $\dn$ & |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
864 |
$\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
865 |
$(r^{\{n\}})^\uparrow$ & $\dn$ & |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
866 |
$\textit{NT}\;[]\,r^\uparrow\,n$ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
867 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
868 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
869 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
870 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
871 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
872 |
There is also an \emph{erase}-function, written $r^\downarrow$, which |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
873 |
transforms a bitcoded regular expression into a (standard) regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
874 |
expression by just erasing the annotated bitsequences. We omit the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
875 |
straightforward definition. For defining the algorithm, we also need |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
876 |
the functions \textit{bnullable} and \textit{bmkeps}(\textit{s}), which are the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
877 |
``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
878 |
bitcoded regular expressions. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
879 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
880 |
\begin{center} |
643 | 881 |
\begin{tabular}{@ {\hspace{-1mm}}c@ {\hspace{10mm}}c@ {}} |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
882 |
\begin{tabular}{@ {}l@ {\hspace{0.5mm}}c@ {\hspace{1mm}}l} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
883 |
$\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{False}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
884 |
$\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{True}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
885 |
$\textit{bnullable}\,(\textit{CHAR}\,bs\,c)$ & $\dn$ & $\textit{False}$\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
886 |
$\textit{bnullable}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ &\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
887 |
\multicolumn{3}{l}{$\quad\exists\, r \in \rs. \,\textit{bnullable}\,r$}\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
888 |
$\textit{bnullable}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
889 |
\multicolumn{3}{l}{$\quad\textit{bnullable}\,r_1\wedge \textit{bnullable}\,r_2$}\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
890 |
$\textit{bnullable}\,(\textit{STAR}\,bs\,r)$ & $\dn$ & |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
891 |
$\textit{True}$\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
892 |
$\textit{bnullable}\,(\textit{NT}\,bs\,r\,n)$ & $\dn$ &\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
893 |
\multicolumn{3}{l}{$\quad\textit{if}\;n = 0\;\textit{then}\;\textit{True}\; |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
894 |
\textit{else}\;\textit{bnullable}\,r$}\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
895 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
896 |
& |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
897 |
\begin{tabular}{@ {}l@ {\hspace{0.5mm}}c@ {\hspace{1mm}}l@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
898 |
$\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
899 |
$\textit{bmkeps}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
900 |
$bs\,@\,\textit{bmkepss}\,\rs$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
901 |
$\textit{bmkeps}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
902 |
\multicolumn{3}{l}{$\quad{}bs \,@\,\textit{bmkeps}\,r_1\,@\, \textit{bmkeps}\,r_2$}\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
903 |
$\textit{bmkeps}\,(\textit{STAR}\,bs\,r)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
904 |
$bs \,@\, [\S]$\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
905 |
$\textit{bmkeps}\,(\textit{NT}\,bs\,r\,n)$ & $\dn$ &\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
906 |
\multicolumn{3}{l@ {}}{$\quad\textit{if}\;n=0\;\textit{then}\;bs \,@\, [\S]$}\\ |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
907 |
\multicolumn{3}{l@ {}}{$\quad\textit{else}\,bs \,@\, [\Z] \,@\, \textit{bmkeps}\,r\,@\, |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
908 |
\textit{bmkeps}\,(@{term "ANTIMES [] r (n - 1)"})$}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
909 |
$\textit{bmkepss}\,(r\!::\!\rs)$ & $\dn$ &\\ |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
910 |
\multicolumn{3}{l}{$\quad\textit{if}\;\textit{bnullable}\,r\,\textit{then}\;\textit{bmkeps}\,r\; |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
911 |
\textit{else}\;\textit{bmkepss}\,\rs$} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
912 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
913 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
914 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
915 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
916 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
917 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
918 |
The key function in the bitcoded algorithm is the derivative of a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
919 |
bitcoded regular expression. This derivative function calculates the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
920 |
derivative but at the same time also the incremental part of the bitsequences |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
921 |
that contribute to constructing a POSIX value. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
922 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
923 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
924 |
\begin{tabular}{@ {}l@ {\hspace{-1mm}}cl@ {}} |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
925 |
$(\textit{ZERO})\backslash c$ & $\;\dn\;$ & $\textit{ZERO}$\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
926 |
$(\textit{ONE}\;bs)\backslash c$ & $\;\dn\;$ & $\textit{ZERO}$\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
927 |
$(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
928 |
$\textit{if}\;c=d\; \;\textit{then}\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
929 |
\textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
930 |
$(\textit{ALTs}\;bs\,\rs)\backslash c$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
931 |
$\textit{ALTs}\,bs\,(\mathit{map}\,(\_\backslash c)\,\rs)$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
932 |
$(\textit{SEQ}\;bs\,r_1\,r_2)\backslash c$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
933 |
$\textit{if}\;\textit{bnullable}\,r_1$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
934 |
& &$\textit{then}\;\textit{ALT}\,bs\;(\textit{SEQ}\,[]\,(r_1\backslash c)\,r_2)$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
935 |
$\;(\textit{fuse}\,(\textit{bmkeps}\,r_1)\,(r_2\backslash c))$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
936 |
& &$\textit{else}\;\textit{SEQ}\,bs\,(r_1\backslash c)\,r_2$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
937 |
$(\textit{STAR}\,bs\,r)\backslash c$ & $\dn$ & |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
938 |
$\textit{SEQ}\;(bs\,@\,[\Z])\,(r\backslash c)\, |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
939 |
(\textit{STAR}\,[]\,r)$\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
940 |
$(\textit{NT}\,bs\,r\,n)\backslash c$ & $\dn$ & |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
941 |
$\textit{if}\;n = 0\; \textit{then}\;\textit{ZERO}\;\textit{else}\; |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
942 |
\textit{SEQ}\;(bs\,@\,[\Z])\,(r\backslash c)\, |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
943 |
(\textit{NT}\,[]\,r\,(n - 1))$ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
944 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
945 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
946 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
947 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
948 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
949 |
This function can also be extended to strings, written $r\backslash s$, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
950 |
just like the standard derivative. We omit the details. Finally we |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
951 |
can define Sulzmann and Lu's bitcoded lexer, which we call \textit{blexer}: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
952 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
953 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
954 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
955 |
$\textit{blexer}\;r\,s$ & $\dn$ & |
642 | 956 |
$\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$\\ |
957 |
& & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$ |
|
958 |
$\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$ |
|
959 |
$\;\textit{else}\;\textit{None}$ |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
960 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
961 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
962 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
963 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
964 |
This bitcoded lexer first internalises the regular expression $r$ and then |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
965 |
builds the bitcoded derivative according to $s$. If the derivative is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
966 |
(b)nullable the string is in the language of $r$ and it extracts the bitsequence using the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
967 |
$\textit{bmkeps}$ function. Finally it decodes the bitsequence into a value. If |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
968 |
the derivative is \emph{not} nullable, then $\textit{None}$ is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
969 |
returned. We can show that this way of calculating a value |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
970 |
generates the same result as \textit{lexer}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
971 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
972 |
Before we can proceed we need to define a helper function, called |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
973 |
\textit{retrieve}, which Sulzmann and Lu introduced for the correctness proof. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
974 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
975 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
976 |
\begin{tabular}{lcl} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
977 |
@{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
978 |
@{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
979 |
@{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
980 |
@{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
981 |
@{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
982 |
@{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
983 |
& $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
984 |
@{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
985 |
@{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
986 |
@{thm (lhs) retrieve.simps(9)} & $\dn$ & @{thm (rhs) retrieve.simps(9)}\\ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
987 |
@{thm (lhs) better_retrieve2} & $\dn$ & @{thm (rhs) better_retrieve2} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
988 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
989 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
990 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
991 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
992 |
The idea behind this function is to retrieve a possibly partial |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
993 |
bitsequence from a bitcoded regular expression, where the retrieval is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
994 |
guided by a value. For example if the value is $\Left$ then we |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
995 |
descend into the left-hand side of an alternative in order to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
996 |
assemble the bitcode. Similarly for |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
997 |
$\Right$. The property we can show is that for a given $v$ and $r$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
998 |
with $\vdash v : r$, the retrieved bitsequence from the internalised |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
999 |
regular expression is equal to the bitcoded version of $v$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1000 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1001 |
\begin{lemma}\label{retrievecode} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1002 |
If $\vdash v : r$ then $\textit{code}\, v = \textit{retrieve}\,(r^\uparrow)\,v$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1003 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1004 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1005 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1006 |
We also need some auxiliary facts about how the bitcoded operations |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1007 |
relate to the ``standard'' operations on regular expressions. For |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1008 |
example if we build a bitcoded derivative and erase the result, this |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1009 |
is the same as if we first erase the bitcoded regular expression and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1010 |
then perform the ``standard'' derivative operation. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1011 |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1012 |
\begin{lemma}\label{bnullable}%\mbox{}\smallskip\\ |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1013 |
\textit{(1)} $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\ |
642 | 1014 |
\mbox{\hspace{21.5mm}}\textit{(2)} $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\ |
1015 |
\mbox{\hspace{21.5mm}}\textit{(3)} $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ provided $\textit{nullable}(r^\downarrow)$ |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1016 |
%\begin{tabular}{ll} |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1017 |
%\textit{(1)} & $(r\backslash s)^\downarrow = (r^\downarrow)\backslash s$\\ |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1018 |
%\textit{(2)} & $\textit{bnullable}(r)$ iff $\textit{nullable}(r^\downarrow)$\\ |
642 | 1019 |
%\textit{(3)} & $\textit{bmkeps}(r) = \textit{retrieve}\,r\,(\textit{mkeps}\,(r^\downarrow))$ \;provided\; $\textit{nullable}(r^\downarrow)$. |
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1020 |
%\end{tabular} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1021 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1022 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1023 |
%\begin{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1024 |
% All properties are by induction on annotated regular expressions. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1025 |
% %There are no interesting cases. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1026 |
%\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1027 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1028 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1029 |
The only difficulty left for the correctness proof is that the bitcoded algorithm |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1030 |
has only a ``forward phase'' where POSIX values are generated incrementally. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1031 |
We can achieve the same effect with @{text lexer} (which has two phases) by stacking up injection |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1032 |
functions during the forward phase. An auxiliary function, called $\textit{flex}$, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1033 |
allows us to recast the rules of $\lexer$ in terms of a single |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1034 |
phase and stacked up injection functions. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1035 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1036 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1037 |
\begin{tabular}{@ {}l@ {}c@ {}l@ {\hspace{10mm}}l@ {}c@ {}l@ {}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1038 |
$\textit{flex}\;r\,f\,[]$ & $\dn$ & $f$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1039 |
$\textit{flex}\;r\,f\,(c\!::\!s)$ & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1040 |
$\textit{flex}\,(r\backslash c)\,(\lambda v.\,f\,(\inj\,r\,c\,v))\,s$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1041 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1042 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1043 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1044 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1045 |
The point of this function is that when |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1046 |
reaching the end of the string, we just need to apply the stacked up |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1047 |
injection functions to the value generated by @{text mkeps}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1048 |
Using this function we can recast the success case in @{text lexer} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1049 |
as follows: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1050 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1051 |
\begin{lemma}\label{flex} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1052 |
If @{text "lexer r s = Some v"} \;then\; @{text "v = "}$\,\textit{flex}\,r\,id\,s\, |
642 | 1053 |
(\textit{mkeps}\,(r\backslash s))$. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1054 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1055 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1056 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1057 |
Note we did not redefine \textit{lexer}, we just established that the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1058 |
value generated by \textit{lexer} can also be obtained by a different |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1059 |
method. While this different method is not efficient (we essentially |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1060 |
need to traverse the string $s$ twice, once for building the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1061 |
derivative $r\backslash s$ and another time for stacking up injection |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1062 |
functions), it helps us with proving |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1063 |
that incrementally building up values in @{text blexer} generates the same result. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1064 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1065 |
This brings us to our main lemma in this section: if we calculate a |
616 | 1066 |
derivative, say $r\backslash s$, and have a value, say $v$, inhabiting |
1067 |
this derivative, then we can produce the result @{text lexer} generates |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1068 |
by applying this value to the stacked-up injection functions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1069 |
that $\textit{flex}$ assembles. The lemma establishes that this is the same |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1070 |
value as if we build the annotated derivative $r^\uparrow\backslash s$ |
616 | 1071 |
and then retrieve the bitcoded version of @{text v}, followed by a |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1072 |
decoding step. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1073 |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1074 |
\begin{lemma}[Main Lemma]\label{mainlemma}\mbox{}\\\it |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1075 |
If $\vdash v : r\backslash s$ then |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1076 |
$\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,v) = |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1077 |
\textit{decode}(\textit{retrieve}\,(r^\uparrow \backslash s)\,v)\,r$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1078 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1079 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1080 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1081 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1082 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1083 |
%With this lemma in place, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1084 |
We can then prove the correctness of \textit{blexer}---it indeed |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1085 |
produces the same result as \textit{lexer}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1086 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1087 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1088 |
\begin{theorem}\label{thmone} |
599 | 1089 |
$\textit{blexer}\,r\,s = \textit{lexer}\,r\,s$ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1090 |
\end{theorem} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1091 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1092 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1093 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1094 |
\noindent This establishes that the bitcoded algorithm \emph{without} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1095 |
simplification produces correct results. This was |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1096 |
only conjectured by Sulzmann and Lu in their paper |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1097 |
\cite{Sulzmann2014}. The next step is to add simplifications. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1098 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1099 |
*} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1100 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1101 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1102 |
section {* Simplification *} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1103 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1104 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1105 |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1106 |
Derivatives as calculated by Brzozowski's method are usually more |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1107 |
complex regular expressions than the initial one; the result is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1108 |
that derivative-based matching and lexing algorithms are |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1109 |
often abysmally slow if the ``growth problem'' is not addressed. As Sulzmann and Lu wrote, various |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1110 |
optimisations are possible, such as the simplifications |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1111 |
$\ZERO{}\,r \Rightarrow \ZERO$, $\ONE{}\,r \Rightarrow r$, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1112 |
$\ZERO{} + r \Rightarrow r$ and $r + r \Rightarrow r$. While these |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1113 |
simplifications can considerably speed up the two algorithms in many |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1114 |
cases, they do not solve fundamentally the growth problem with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1115 |
derivatives. To see this let us return to the example from the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1116 |
Introduction that shows the derivatives for \mbox{@{text "(a + aa)\<^sup>*"}}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1117 |
If we delete in the 3rd step all $\ZERO{}s$ and $\ONE$s according to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1118 |
the simplification rules shown above we obtain |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1119 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1120 |
%% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1121 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1122 |
\begin{equation}\def\xll{\xrightarrow{\_\backslash{} [a, a, a]}}\label{derivex} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1123 |
(a + aa)^* \quad\xll\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1124 |
\underbrace{\mbox{$(\ONE + a) \cdot (a + aa)^*$}}_{r} \;+\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1125 |
((a + aa)^* + \underbrace{\mbox{$(\ONE + a) \cdot (a + aa)^*$}}_{r}) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1126 |
\end{equation} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1127 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1128 |
\noindent This is a simpler derivative, but unfortunately we |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1129 |
cannot make any further simplifications. This is a problem because |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1130 |
the outermost alternatives contains two copies of the same |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1131 |
regular expression (underlined with $r$). These copies will |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1132 |
spawn new copies in later derivative steps and they in turn even more copies. This |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1133 |
destroys any hope of taming the size of the derivatives. But the |
642 | 1134 |
second copy of $r$ in~\eqref{derivex} will never contribute to a |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1135 |
value, because POSIX lexing will always prefer matching a string |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1136 |
with the first copy. So it could be safely removed without affecting the correctness of the algorithm. |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1137 |
The issue with the simple-minded |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1138 |
simplification rules above is that the rule $r + r \Rightarrow r$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1139 |
will never be applicable because as can be seen in this example the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1140 |
regular expressions are not next to each other but separated by another regular expression. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1141 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1142 |
But here is where Sulzmann and Lu's representation of generalised |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1143 |
alternatives in the bitcoded algorithm shines: in @{term |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1144 |
"ALTs bs rs"} we can define a more aggressive simplification by |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1145 |
recursively simplifying all regular expressions in @{text rs} and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1146 |
then analyse the resulting list and remove any duplicates. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1147 |
Another advantage with the bitsequences in bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1148 |
expressions is that they can be easily modified such that simplification does not |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1149 |
interfere with the value constructions. For example we can ``flatten'', or |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1150 |
de-nest, or spill out, @{text ALTs} as follows |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1151 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1152 |
\[ |
642 | 1153 |
@{text "ALTs bs\<^sub>1 ((ALTs bs\<^sub>2 rs\<^sub>2) :: rs\<^sub>1)"} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1154 |
\quad\xrightarrow{bsimp}\quad |
616 | 1155 |
@{text "ALTs bs\<^sub>1 ((map (fuse bs\<^sub>2) rs\<^sub>2) @ rs\<^sub>1)"} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1156 |
\] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1157 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1158 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1159 |
where we just need to fuse the bitsequence that has accumulated in @{text "bs\<^sub>2"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1160 |
to the alternatives in @{text "rs\<^sub>2"}. As we shall show below this will |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1161 |
ensure that the correct value corresponding to the original (unsimplified) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1162 |
regular expression can still be extracted. %In this way the value construction |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1163 |
%is not affected by simplification. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1164 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1165 |
However there is one problem with the definition for the more |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1166 |
aggressive simplification rules described by Sulzmann and Lu. Recasting |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1167 |
their definition with our syntax they define the step of removing |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1168 |
duplicates as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1169 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1170 |
\[ @{text "bsimp (ALTs bs rs)"} \dn @{text "ALTs |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1171 |
bs (nub (map bsimp rs))"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1172 |
\] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1173 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1174 |
\noindent where they first recursively simplify the regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1175 |
expressions in @{text rs} (using @{text map}) and then use |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1176 |
Haskell's @{text nub}-function to remove potential |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1177 |
duplicates. While this makes sense when considering the example |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1178 |
shown in \eqref{derivex}, @{text nub} is the inappropriate |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1179 |
function in the case of bitcoded regular expressions. The reason |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1180 |
is that in general the elements in @{text rs} will have a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1181 |
different annotated bitsequence and in this way @{text nub} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1182 |
will never find a duplicate to be removed. One correct way to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1183 |
handle this situation is to first \emph{erase} the regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1184 |
expressions when comparing potential duplicates. This is inspired |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1185 |
by Scala's list functions of the form \mbox{@{text "distinctWith rs eq |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1186 |
acc"}} where @{text eq} is an user-defined equivalence relation that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1187 |
compares two elements in @{text rs}. We define this function in |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1188 |
Isabelle/HOL as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1189 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1190 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1191 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1192 |
@{thm (lhs) distinctWith.simps(1)} & $\dn$ & @{thm (rhs) distinctWith.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1193 |
@{thm (lhs) distinctWith.simps(2)} & $\dn$ & |
642 | 1194 |
@{text "if (\<exists> y \<in> acc. eq x y)"} |
1195 |
@{text "then distinctWith xs eq acc"}\\ |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1196 |
& & @{text "else x :: distinctWith xs eq ({x} \<union> acc)"} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1197 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1198 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1199 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1200 |
\noindent where we scan the list from left to right (because we |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1201 |
have to remove later copies). In @{text distinctWith}, @{text eq} is intended to be an |
616 | 1202 |
equivalence relation for bitcoded regular expressions and @{text acc} is an accumulator for bitcoded regular |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1203 |
expressions---essentially a set of regular expressions that we have already seen |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1204 |
while scanning the list. Therefore we delete an element, say @{text x}, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1205 |
from the list provided a @{text "y"} with @{text "y"} being equivalent to @{text x} is already in the accumulator; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1206 |
otherwise we keep @{text x} and scan the rest of the list but |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1207 |
add @{text "x"} as another ``seen'' element to @{text acc}. We will use |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1208 |
@{term distinctWith} where @{text eq} is an equivalence that deletes bitsequences from bitcoded regular expressions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1209 |
before comparing the components. One way to define this in Isabelle/HOL is by the following recursive function from |
616 | 1210 |
bitcoded regular expressions to @{text bool}: |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1211 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1212 |
\begin{center} |
642 | 1213 |
\begin{tabular}{@ {}cc@ {}} |
1214 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {\hspace{1mm}}} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1215 |
@{thm (lhs) eq1.simps(1)} & $\dn$ & @{thm (rhs) eq1.simps(1)}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1216 |
@{thm (lhs) eq1.simps(2)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(2)[of DUMMY DUMMY]}\\ |
642 | 1217 |
@{thm (lhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(7)[of DUMMY "r\<^sub>1" DUMMY "r\<^sub>2"]}\\ |
1218 |
@{thm (lhs) eq1.simps(5)[of DUMMY DUMMY]} & $\dn$ & @{thm (rhs) eq1.simps(5)[of DUMMY DUMMY]}\\ |
|
1219 |
\mbox{} |
|
1220 |
\end{tabular} |
|
1221 |
& |
|
1222 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {\hspace{1mm}}l@ {}} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1223 |
@{thm (lhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]} & $\dn$ & @{thm (rhs) eq1.simps(3)[of DUMMY "c" DUMMY "d"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1224 |
@{thm (lhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]} & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1225 |
@{thm (rhs) eq1.simps(4)[of DUMMY "r\<^sub>1\<^sub>1" "r\<^sub>1\<^sub>2" DUMMY "r\<^sub>2\<^sub>1" "r\<^sub>2\<^sub>2"]}\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1226 |
@{thm (lhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]} & $\dn$ & @{thm (rhs) eq1.simps(8)[of DUMMY "r\<^sub>1" "n\<^sub>1" DUMMY "r\<^sub>2" "n\<^sub>2"]}\\ |
642 | 1227 |
@{thm (lhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]} & $\dn$ & \\ |
1228 |
\multicolumn{3}{r}{@{thm (rhs) eq1.simps(6)[of DUMMY "r\<^sub>1" "rs\<^sub>1" DUMMY "r\<^sub>2" "rs\<^sub>2"]}} |
|
1229 |
\end{tabular} |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1230 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1231 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1232 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1233 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1234 |
where all other cases are set to @{text False}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1235 |
This equivalence is clearly a computationally more expensive operation than @{text nub}, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1236 |
but is needed in order to make the removal of unnecessary copies |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1237 |
to work properly. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1238 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1239 |
Our simplification function depends on three more helper functions, one is called |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1240 |
@{text flts} and analyses lists of regular expressions coming from alternatives. |
642 | 1241 |
It is defined by four clauses as follows: |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1242 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1243 |
\begin{center} |
642 | 1244 |
\begin{tabular}{c} |
1245 |
@{thm (lhs) flts.simps(1)} $\dn$ @{thm (rhs) flts.simps(1)} \qquad |
|
1246 |
@{thm (lhs) flts.simps(2)} $\dn$ @{thm (rhs) flts.simps(2)} \qquad |
|
1247 |
@{text "flts ((ALTs bs' rs') :: rs"} |
|
1248 |
%@{ thm (lhs) flts.simps(3)[of "bs'" "rs'"]} |
|
1249 |
$\dn$ @{thm (rhs) flts.simps(3)[of "bs'" "rs'"]}\smallskip\\ |
|
1250 |
@{text "flts (r :: rs)"} $\dn$ @{text "r :: flts rs"}\hspace{5mm}(otherwise) |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1251 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1252 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1253 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1254 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1255 |
The second clause of @{text flts} removes all instances of @{text ZERO} in alternatives and |
642 | 1256 |
the third ``de-nests'' alternatives (but retains the |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1257 |
bitsequence @{text "bs'"} accumulated in the inner alternative). There are |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1258 |
some corner cases to be considered when the resulting list inside an alternative is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1259 |
empty or a singleton list. We take care of those cases in the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1260 |
@{text "bsimpALTs"} function; similarly we define a helper function that simplifies |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1261 |
sequences according to the usual rules about @{text ZERO}s and @{text ONE}s: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1262 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1263 |
\begin{center} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1264 |
\begin{tabular}{@ {}c@ {\hspace{4mm}}c@ {}} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1265 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1266 |
@{text "bsimpALTs bs []"} & $\dn$ & @{text "ZERO"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1267 |
@{text "bsimpALTs bs [r]"} & $\dn$ & @{text "fuse bs r"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1268 |
@{text "bsimpALTs bs rs"} & $\dn$ & @{text "ALTs bs rs"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1269 |
\mbox{}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1270 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1271 |
& |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1272 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1273 |
@{text "bsimpSEQ bs _ ZERO"} & $\dn$ & @{text "ZERO"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1274 |
@{text "bsimpSEQ bs ZERO _"} & $\dn$ & @{text "ZERO"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1275 |
@{text "bsimpSEQ bs\<^sub>1 (ONE bs\<^sub>2) r\<^sub>2"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1276 |
& $\dn$ & @{text "fuse (bs\<^sub>1 @ bs\<^sub>2) r\<^sub>2"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1277 |
@{text "bsimpSEQ bs r\<^sub>1 r\<^sub>2"} & $\dn$ & @{text "SEQ bs r\<^sub>1 r\<^sub>2"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1278 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1279 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1280 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1281 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1282 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1283 |
With this in place we can define our simplification function as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1284 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1285 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1286 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1287 |
@{thm (lhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1288 |
@{thm (rhs) bsimp.simps(1)[of "bs" "r\<^sub>1" "r\<^sub>2"]}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1289 |
@{thm (lhs) bsimp.simps(2)[of "bs" _]} & $\dn$ & @{text "bsimpALTs bs (distinctWith (flts (map bsimp rs)) \<approx> \<emptyset>)"}\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1290 |
@{text "bsimp r"} & $\dn$ & @{text r} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1291 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1292 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1293 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1294 |
\noindent |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1295 |
We believe our recursive function @{term bsimp} simplifies bitcoded regular |
642 | 1296 |
expressions as intended by Sulzmann and Lu with the small addition of removing ``useless'' @{text ONE}s |
1297 |
in sequence regular |
|
1298 |
expressions. There is no point in applying the |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1299 |
@{text bsimp} function repeatedly (like the simplification in their paper which needs to be |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1300 |
applied until a fixpoint is reached) because we can show that @{term bsimp} is idempotent, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1301 |
that is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1302 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1303 |
\begin{proposition} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1304 |
@{term "bsimp (bsimp r) = bsimp r"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1305 |
\end{proposition} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1306 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1307 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1308 |
This can be proved by induction on @{text r} but requires a detailed analysis |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1309 |
that the de-nesting of alternatives always results in a flat list of regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1310 |
expressions. We omit the details since it does not concern the correctness proof. |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1311 |
%It might be interesting to not that we do not simplify inside @{term STAR} and |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1312 |
%@{text NT}: the reason is that we want to keep the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1313 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1314 |
Next we can include simplification after each derivative step leading to the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1315 |
following notion of bitcoded derivatives: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1316 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1317 |
\begin{center} |
616 | 1318 |
\begin{tabular}{c@ {\hspace{10mm}}c} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1319 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1320 |
@{thm (lhs) bders_simp.simps(1)} & $\dn$ & @{thm (rhs) bders_simp.simps(1)} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1321 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1322 |
& |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1323 |
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1324 |
@{thm (lhs) bders_simp.simps(2)} & $\dn$ & @{thm (rhs) bders_simp.simps(2)} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1325 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1326 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1327 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1328 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1329 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1330 |
and use it in the improved lexing algorithm defined as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1331 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1332 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1333 |
\begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1334 |
$\textit{blexer}^+\;r\,s$ & $\dn$ & |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1335 |
$\textit{let}\;r_{der} = (r^\uparrow)\backslash_{bsimp}\, s\;\textit{in}$\\ |
642 | 1336 |
& & $\;\;\;\textit{if}\; \textit{bnullable}(r_{der})$ |
1337 |
$\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r$ $\;\textit{else}\;\textit{None}$ |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1338 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1339 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1340 |
|
616 | 1341 |
\noindent |
1342 |
Note that in $\textit{blexer}^+$ the derivative $r_{der}$ is calculated |
|
1343 |
using the simplifying derivative $\_\,\backslash_{bsimp}\,\_$. |
|
1344 |
The remaining task is to show that @{term blexer} and |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1345 |
@{term "blexer_simp"} generate the same answers. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1346 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1347 |
When we first |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1348 |
attempted this proof we encountered a problem with the idea |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1349 |
in Sulzmann and Lu's paper where the argument seems to be to appeal |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1350 |
again to the @{text retrieve}-function defined for the unsimplified version |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1351 |
of the algorithm. But |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1352 |
this does not work, because desirable properties such as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1353 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1354 |
\[ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1355 |
@{text "retrieve r v = retrieve (bsimp r) v"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1356 |
\] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1357 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1358 |
\noindent do not hold under simplification---this property |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1359 |
essentially purports that we can retrieve the same value from a |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1360 |
simplified version of the regular expression. To start with @{text retrieve} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1361 |
depends on the fact that the value @{text v} corresponds to the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1362 |
structure of the regular expression @{text r}---but the whole point of simplification |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1363 |
is to ``destroy'' this structure by making the regular expression simpler. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1364 |
To see this consider the regular expression @{term "r = ALT r' ZERO"} and a corresponding |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1365 |
value @{text "v = Left v'"}. If we annotate bitcodes to @{text "r"}, then |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1366 |
we can use @{text retrieve} with @{text r} and @{text v} in order to extract a corresponding |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1367 |
bitsequence. The reason that this works is that @{text r} is an alternative |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1368 |
regular expression and @{text v} a corresponding @{text "Left"}-value. However, if we simplify |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1369 |
@{text r}, then @{text v} does not correspond to the shape of the regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1370 |
expression anymore. So unless one can somehow |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1371 |
synchronise the change in the simplified regular expressions with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1372 |
the original POSIX value, there is no hope of appealing to @{text retrieve} in the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1373 |
correctness argument for @{term blexer_simp}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1374 |
|
642 | 1375 |
For our proof we found it more helpful to introduce the rewriting systems shown in |
616 | 1376 |
Fig~\ref{SimpRewrites}. The idea is to generate |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1377 |
simplified regular expressions in small steps (unlike the @{text bsimp}-function which |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1378 |
does the same in a big step), and show that each of |
616 | 1379 |
the small steps preserves the bitcodes that lead to the POSIX value. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1380 |
The rewrite system is organised such that $\leadsto$ is for bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1381 |
expressions and $\stackrel{s}{\leadsto}$ for lists of bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1382 |
expressions. The former essentially implements the simplifications of |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1383 |
@{text "bsimpSEQ"} and @{text flts}; while the latter implements the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1384 |
simplifications in @{text "bsimpALTs"}. We can show that any bitcoded |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1385 |
regular expression reduces in zero or more steps to the simplified |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1386 |
regular expression generated by @{text bsimp}: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1387 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1388 |
\begin{lemma}\label{lemone} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1389 |
@{thm[mode=IfThen] rewrites_to_bsimp} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1390 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1391 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1392 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1393 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1394 |
\noindent |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1395 |
We can also show that this rewrite system preserves @{term bnullable}, that |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1396 |
is simplification does not affect nullability: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1397 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1398 |
\begin{lemma}\label{lembnull} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1399 |
@{thm[mode=IfThen] bnullable0(1)[of "r\<^sub>1" "r\<^sub>2"]} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1400 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1401 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1402 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1403 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1404 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1405 |
From this, we can show that @{text bmkeps} will produce the same bitsequence |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1406 |
as long as one of the bitcoded regular expressions in $\leadsto$ is nullable (this lemma |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1407 |
establishes the missing fact we were not able to establish using @{text retrieve}, as suggested |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1408 |
in the paper by Sulzmannn and Lu). |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1409 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1410 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1411 |
\begin{lemma}\label{lemthree} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1412 |
@{thm[mode=IfThen] rewrite_bmkeps_aux(1)[of "r\<^sub>1" "r\<^sub>2"]} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1413 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1414 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1415 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1416 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1417 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1418 |
Crucial is also the fact that derivative steps and simplification steps can be interleaved, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1419 |
which is shown by the fact that $\leadsto$ is preserved under derivatives. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1420 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1421 |
\begin{lemma}\label{bderlem} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1422 |
@{thm[mode=IfThen] rewrite_preserves_bder(1)[of "r\<^sub>1" "r\<^sub>2"]} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1423 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1424 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1425 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1426 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1427 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1428 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1429 |
Using this fact together with Lemma~\ref{lemone} allows us to prove the central lemma |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1430 |
that the unsimplified |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1431 |
derivative (with a string @{term s}) reduces to the simplified derivative (with the same string). |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1432 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1433 |
\begin{lemma}\label{lemtwo} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1434 |
@{thm[mode=IfThen] central} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1435 |
\end{lemma} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1436 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1437 |
%\begin{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1438 |
%By reverse induction on @{term s} generalising over @{text r}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1439 |
%\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1440 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1441 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1442 |
With these lemmas in place we can finally establish that @{term "blexer_simp"} and @{term "blexer"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1443 |
generate the same value, and using Theorem~\ref{thmone} from the previous section that this value |
599 | 1444 |
is indeed the POSIX value as generated by \textit{lexer}. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1445 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1446 |
\begin{theorem} |
599 | 1447 |
@{thm[mode=IfThen] main_blexer_simp[symmetric]} \; (@{text "= lexer r s"}\; by Thm.~\ref{thmone}) |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1448 |
\end{theorem} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1449 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1450 |
%\begin{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1451 |
%By unfolding the definitions and using Lemmas~\ref{lemtwo} and \ref{lemthree}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1452 |
%\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1453 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1454 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1455 |
This means that if the algorithm is called with a regular expression @{term r} and a string |
498
ab626b60ee64
fixed tiny typo in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
496
diff
changeset
|
1456 |
@{term s} with $@{term s} \in L(@{term r})$, it will return @{term "Some v"} for the unique |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1457 |
@{term v} we defined by the POSIX relation $(@{term s}, @{term r}) \rightarrow @{term v}$; otherwise |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1458 |
the algorithm returns @{term "None"} when $s \not\in L(r)$ and no such @{text v} exists. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1459 |
This completes the correctness proof for the second POSIX lexing algorithm by Sulzmann and Lu. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1460 |
The interesting point of this algorithm is that the sizes of derivatives do not grow arbitrarily big but |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1461 |
can be finitely bounded, which |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1462 |
we shall show next. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1463 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1464 |
\begin{figure}[t] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1465 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1466 |
\begin{tabular}{@ {\hspace{-8mm}}c@ {}} |
643 | 1467 |
\\[-8mm] |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1468 |
@{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}$S\ZERO{}_l$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1469 |
@{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}$S\ZERO{}_r$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1470 |
@{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}$S\ONE$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1471 |
@{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}SL\qquad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1472 |
@{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}SR\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1473 |
@{thm[mode=Axiom] bs6}$A0$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1474 |
@{thm[mode=Axiom] bs7}$A1$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1475 |
@{thm[mode=Rule] bs10[of "rs\<^sub>1" "rs\<^sub>2"]}$AL$\\ |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1476 |
@{thm[mode=Rule] rrewrite_srewrite.ss2[of "rs\<^sub>1" "rs\<^sub>2"]}$LT$\quad |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1477 |
@{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}$LH$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1478 |
@{thm[mode=Axiom] ss4}$L\ZERO$\quad |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1479 |
@{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}$LS$\medskip\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1480 |
@{thm[mode=Rule] ss6[of "r\<^sub>2" "r\<^sub>1" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}$LD$\\[-5mm] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1481 |
\end{tabular} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1482 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1483 |
\caption{The rewrite rules that generate simplified regular expressions |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1484 |
in small steps: @{term "rrewrite r\<^sub>1 r\<^sub>2"} is for bitcoded regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1485 |
expressions and @{term "srewrite rs\<^sub>1 rs\<^sub>2"} for \emph{lists} of bitcoded |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1486 |
expressions. Interesting is the $LD$ rule that allows copies of regular |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1487 |
expressions to be removed provided a regular expression earlier in the list can |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1488 |
match the same strings.}\label{SimpRewrites} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1489 |
\end{figure} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1490 |
*} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1491 |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1492 |
section {* Finite Bound for the Size of Derivatives *} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1493 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1494 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1495 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1496 |
In this section let us sketch our argument for why the size of the simplified |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1497 |
derivatives with the aggressive simplification function can be finitely bounded. Suppose |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1498 |
we have a size function for bitcoded regular expressions, written |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1499 |
$\llbracket r\rrbracket$, which counts the number of nodes if we regard $r$ as a tree |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1500 |
(we omit the precise definition; ditto for lists $\llbracket r\!s\rrbracket$). For this we show that for every $r$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1501 |
there exists a bound $N$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1502 |
such that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1503 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1504 |
\begin{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1505 |
$\forall s. \; \llbracket@{term "bders_simp r s"}\rrbracket \leq N$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1506 |
\end{center} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1507 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1508 |
\noindent |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1509 |
Note that the bound $N$ is a bound for \emph{all} strings, no matter how long they are. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1510 |
We establish this bound by induction on $r$. The base cases for @{term AZERO}, |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1511 |
@{term "AONE bs"} and @{term "ACHAR bs c"} are straightforward. The interesting case is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1512 |
for sequences of the form @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}. In this case our induction |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1513 |
hypotheses state $\exists N_1. \forall s. \; \llbracket{}@{term "bders_simp r\<^sub>1 s"}\rrbracket \leq N_1$ and |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1514 |
$\exists N_2. \forall s. \; \llbracket@{term "bders_simp r\<^sub>2 s"}\rrbracket \leq N_2$. We can reason as follows\medskip |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1515 |
% |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1516 |
%!\begin{center} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1517 |
|
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1518 |
\noindent\begin{minipage}{\textwidth} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1519 |
\begin{tabular}{lcll} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1520 |
& & $ \llbracket@{term "bders_simp (ASEQ bs r\<^sub>1 r\<^sub>2) s"}\rrbracket$\\ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1521 |
& $ = $ & $\llbracket bsimp\,(\textit{ALTs}\;bs\;((@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}) :: |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1522 |
[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suf}(@{text r}_1, s)]))\rrbracket $ & (1) \\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1523 |
& $\leq$ & |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1524 |
$\llbracket\textit{distinctWith}\,((@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}) :: |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1525 |
[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suf}(@{text r}_1, s)])\,\approx\;@{term "{}"}\rrbracket + 1 $ & (2) \\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1526 |
& $\leq$ & $\llbracket@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}\rrbracket + |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1527 |
\llbracket\textit{distinctWith}\,[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suf}(@{text r}_1, s)]\,\approx\;@{term "{}"}\rrbracket + 1 $ & (3) \\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1528 |
& $\leq$ & $N_1 + \llbracket r_2\rrbracket + 2 + |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1529 |
\llbracket\textit{distinctWith}\,[@{term "bders_simp r\<^sub>2 s'"} \;|\; s' \in \textit{Suf}(@{text r}_1, s)]\,\approx\;@{term "{}"}\rrbracket$ & (4)\\ |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1530 |
& $\leq$ & $N_1 + \llbracket r_2\rrbracket + 2 + l_{N_{2}} * N_{2}$ & (5) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1531 |
\end{tabular} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1532 |
\end{minipage}\medskip |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1533 |
%!\end{center} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1534 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1535 |
% tell Chengsong about Indian paper of closed forms of derivatives |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1536 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1537 |
\noindent |
616 | 1538 |
where in (1) the set $\textit{Suf}(@{text "r"}_1, s)$ are all the suffixes of $s$ where @{term "bders_simp r\<^sub>1 s'"} is nullable ($s'$ being a suffix of $s$). |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1539 |
In (3) we know that $\llbracket@{term "ASEQ [] (bders_simp r\<^sub>1 s) r\<^sub>2"}\rrbracket$ is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1540 |
bounded by $N_1 + \llbracket{}r_2\rrbracket + 1$. In (5) we know the list comprehension contains only regular expressions of size smaller |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1541 |
than $N_2$. The list length after @{text distinctWith} is bounded by a number, which we call $l_{N_2}$. It stands |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1542 |
for the number of distinct regular expressions smaller than $N_2$ (there can only be finitely many of them). |
642 | 1543 |
We reason similarly for @{text STAR} and @{text NT}.\smallskip |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1544 |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1545 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1546 |
Clearly we give in this finiteness argument (Step (5)) a very loose bound that is |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1547 |
far from the actual bound we can expect. We can do better than this, but this does not improve |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1548 |
the finiteness property we are proving. If we are interested in a polynomial bound, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1549 |
one would hope to obtain a similar tight bound as for partial |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1550 |
derivatives introduced by Antimirov \cite{Antimirov95}. After all the idea with |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1551 |
@{text distinctWith} is to maintain a ``set'' of alternatives (like the sets in |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1552 |
partial derivatives). Unfortunately to obtain the exact same bound would mean |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1553 |
we need to introduce simplifications, such as |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1554 |
$(r_1 + r_2) \cdot r_3 \longrightarrow (r_1 \cdot r_3) + (r_2 \cdot r_3)$, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1555 |
which exist for partial derivatives. However, if we introduce them in our |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1556 |
setting we would lose the POSIX property of our calculated values. For example |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1557 |
given the regular expressions @{term "SEQ (ALT a ab) (ALT b ONE)"} and the string $[a, b]$, then our |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1558 |
algorithm generates the following correct POSIX value |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1559 |
% |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1560 |
\[ |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1561 |
@{term "Seq (Right (Seq (Char a) (Char b))) (Right Empty)"} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1562 |
\] |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1563 |
|
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1564 |
\noindent |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1565 |
Essentially it matches the string with the longer @{text "Right"}-alternative in the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1566 |
first sequence (and then the `rest' with the empty regular expression @{const ONE} from the second sequence). |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1567 |
If we add the simplification above, then we obtain the following value |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1568 |
@{term "Seq (Left (Char a)) (Left (Char b))"} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1569 |
where the @{text "Left"}-alternatives get priority. However, this violates |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1570 |
the POSIX rules and we have not been able to |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1571 |
reconcile this problem. Therefore we leave better bounds for future work.%!\\[-6.5mm] |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1572 |
|
644 | 1573 |
Note also that while Antimirov was able to give a bound on the \emph{size} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1574 |
of his partial derivatives~\cite{Antimirov95}, Brzozowski gave a bound |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1575 |
on the \emph{number} of derivatives, provided they are quotient via |
643 | 1576 |
ACI rules \cite{Brzozowski1964}. Brzozowski's result is crucial when one |
642 | 1577 |
uses his derivatives for obtaining a DFA (it essentially bounds |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1578 |
the number of states). However, this result does \emph{not} |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1579 |
transfer to our setting where we are interested in the \emph{size} of the |
642 | 1580 |
derivatives. For example it is \emph{not} true for our derivatives that the |
1581 |
set of derivatives $r \backslash s$ for a given $r$ and all strings |
|
617 | 1582 |
$s$ is finite (even when simplified). This is because for our annotated regular expressions |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1583 |
the bitcode annotation is dependent on the number of iterations that |
642 | 1584 |
are needed for @{text STAR}-regular expressions. This is not a problem for us: Since we intend to do lexing |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1585 |
by calculating (as fast as possible) derivatives, the bound on the size |
642 | 1586 |
of the derivatives is important, not their number. % of derivatives. |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1587 |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1588 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1589 |
*} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1590 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1591 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1592 |
section {* Conclusion *} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1593 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1594 |
text {* |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1595 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1596 |
We set out in this work to prove in Isabelle/HOL the correctness of |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1597 |
the second POSIX lexing algorithm by Sulzmann and Lu |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1598 |
\cite{Sulzmann2014}. This follows earlier work where we established |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1599 |
the correctness of the first algorithm |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1600 |
\cite{AusafDyckhoffUrban2016}. In the earlier work we needed to |
643 | 1601 |
introduce our own specification for POSIX values, |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1602 |
because the informal definition given by Sulzmann and Lu did not |
616 | 1603 |
stand up to formal proof. Also for the second algorithm we needed |
642 | 1604 |
to introduce our own definitions and proof ideas in order to |
1605 |
establish the correctness. Our interest in the second algorithm |
|
1606 |
lies in the fact that by using bitcoded regular expressions and an |
|
1607 |
aggressive simplification method there is a chance that the |
|
1608 |
derivatives can be kept universally small (we established in this |
|
1609 |
paper that for a given $r$ they can be kept finitely bounded for |
|
643 | 1610 |
\emph{all} strings). Our formalisation is approximately 7500 lines of |
642 | 1611 |
Isabelle code. A little more than half of this code concerns the finiteness bound |
1612 |
obtained in Section 5. This slight ``bloat'' in the latter part is because |
|
1613 |
we had to introduce an intermediate datatype for annotated regular expressions and repeat many |
|
643 | 1614 |
definitions for this intermediate datatype. But overall we think this made our |
1615 |
formalisation work smoother. The code of our formalisation can be found at |
|
1616 |
\textcolor{darkblue}{\url{https://github.com/urbanchr/posix}}. |
|
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1617 |
%This is important if one is after |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1618 |
%an efficient POSIX lexing algorithm based on derivatives. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1619 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1620 |
Having proved the correctness of the POSIX lexing algorithm, which |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1621 |
lessons have we learned? Well, we feel this is a very good example |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1622 |
where formal proofs give further insight into the matter at |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1623 |
hand. For example it is very hard to see a problem with @{text nub} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1624 |
vs @{text distinctWith} with only experimental data---one would still |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1625 |
see the correct result but find that simplification does not simplify in well-chosen, but not |
499
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1626 |
obscure, examples. |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1627 |
%We found that from an implementation |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1628 |
%point-of-view it is really important to have the formal proofs of |
6a100d32314c
updated the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
498
diff
changeset
|
1629 |
%the corresponding properties at hand. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1630 |
|
642 | 1631 |
With the results reported here, we can of course only make a claim |
1632 |
about the correctness of the algorithm and the sizes of the |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1633 |
derivatives, not about the efficiency or runtime of our version of |
642 | 1634 |
Sulzmann and Lu's algorithm. But we found the size is an important |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1635 |
first indicator about efficiency: clearly if the derivatives can |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1636 |
grow to arbitrarily big sizes and the algorithm needs to traverse |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1637 |
the derivatives possibly several times, then the algorithm will be |
642 | 1638 |
slow---excruciatingly slow that is. Other works seem to make |
1639 |
stronger claims, but during our formalisation work we have |
|
1640 |
developed a healthy suspicion when for example experimental data is |
|
1641 |
used to back up efficiency claims. For instance Sulzmann and Lu |
|
1642 |
write about their equivalent of @{term blexer_simp} \textit{``...we |
|
1643 |
can incrementally compute bitcoded parse trees in linear time in |
|
1644 |
the size of the input''} \cite[Page 14]{Sulzmann2014}. Given the |
|
1645 |
growth of the derivatives in some cases even after aggressive |
|
1646 |
simplification, this is a hard to believe claim. A similar claim |
|
1647 |
about a theoretical runtime of @{text "O(n\<^sup>2)"} for one |
|
1648 |
specific list of regular expressions is made for the Verbatim |
|
1649 |
lexer, which calculates tokens according to POSIX |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1650 |
rules~\cite{verbatim}. For this, Verbatim uses Brzozowski's |
642 | 1651 |
derivatives like in our work. About their empirical data, the authors write: |
1652 |
\textit{``The results of our empirical tests [..] confirm that |
|
1653 |
Verbatim has @{text "O(n\<^sup>2)"} time complexity.''} |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1654 |
\cite[Section~VII]{verbatim}. While their correctness proof for |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1655 |
Verbatim is formalised in Coq, the claim about the runtime |
642 | 1656 |
complexity is only supported by some empirical evidence obtained by |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1657 |
using the code extraction facilities of Coq. Given our observation |
642 | 1658 |
with the ``growth problem'' of derivatives, this runtime bound |
1659 |
is unlikely to hold universally: indeed we tried out their extracted OCaml |
|
1660 |
code with the example \mbox{@{text "(a + aa)\<^sup>*"}} as a single |
|
1661 |
lexing rule, and it took for us around 5 minutes to tokenise a |
|
1662 |
string of 40 $a$'s and that increased to approximately 19 minutes |
|
1663 |
when the string is 50 $a$'s long. Taking into account that |
|
1664 |
derivatives are not simplified in the Verbatim lexer, such numbers |
|
1665 |
are not surprising. Clearly our result of having finite |
|
1666 |
derivatives might sound rather weak in this context but we think |
|
1667 |
such efficiency claims really require further scrutiny. The |
|
1668 |
contribution of this paper is to make sure derivatives do not grow |
|
1669 |
arbitrarily big (universally). In the example \mbox{@{text "(a + |
|
1670 |
aa)\<^sup>*"}}, \emph{all} derivatives have a size of 17 or |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1671 |
less. The result is that lexing a string of, say, 50\,000 a's with |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1672 |
the regular expression \mbox{@{text "(a + aa)\<^sup>*"}} takes |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1673 |
approximately 10 seconds with our Scala implementation of the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1674 |
presented algorithm. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1675 |
|
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1676 |
Finally, let us come back to the point about bounded regular |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1677 |
expressions. We have in this paper only shown that @{term "NTIMES r |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1678 |
n"} can be included, but all our results extend straightforwardly |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1679 |
also to the other bounded regular expressions. We find bounded |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1680 |
regular expressions fit naturally into the setting of Brzozowski |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1681 |
derivatives and the bitcoded regular expressions by Sulzmann and Lu. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1682 |
In contrast bounded regular expressions are often the Achilles' |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1683 |
heel in regular expression matchers that use the traditional |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1684 |
automata-based approach to lexing, primarily because they need to expand the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1685 |
counters of bounded regular expressions into $n$-connected copies |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1686 |
of an automaton. This is not needed in Sulzmann and Lu's algorithm. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1687 |
To see the difference consider for example the regular expression @{term "SEQ (NTIMES a |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1688 |
1001) (STAR a)"}, which is not permitted in the Go language because |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1689 |
the counter is too big. In contrast we have no problem with |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1690 |
matching this regular expression with, say 50\,000 a's, because the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1691 |
counters can be kept compact. In fact, the overall size of the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1692 |
derivatives is never greater than 5 in this example. Even in the |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1693 |
example from Section 2, where Rust raises an error message, namely |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1694 |
\mbox{@{text "a\<^bsup>{1000}{100}{5}\<^esup>"}}, the maximum size for |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1695 |
our derivatives is a moderate 14. |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1696 |
|
642 | 1697 |
Let us also return to the example @{text |
1698 |
"a\<^bsup>{0}{4294967295}\<^esup>"} which until recently Rust |
|
1699 |
deemed acceptable. But this was due to a bug. It turns out that it took Rust |
|
1700 |
more than 11 minutes to generate an automaton for this regular |
|
1701 |
expression and then to determine that a string of just one(!) |
|
1702 |
@{text a} does \emph{not} match this regular expression. Therefore |
|
1703 |
it is probably a wise choice that in newer versions of Rust's |
|
1704 |
regular expression library such regular expressions are declared as |
|
643 | 1705 |
``too big'' and raise an error message. While this is clearly |
642 | 1706 |
a contrived example, the safety guaranties Rust wants to provide necessitate |
1707 |
this conservative approach. |
|
1708 |
However, with the derivatives and simplifications we have shown |
|
1709 |
in this paper, the example can be solved with ease: |
|
1710 |
it essentially only involves operations on |
|
1711 |
integers and our Scala implementation takes only a few seconds to |
|
1712 |
find out that this string, or even much larger strings, do not match. |
|
1713 |
||
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1714 |
Let us also compare our work to the verified Verbatim++ lexer where |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1715 |
the authors of the Verbatim lexer introduced a number of |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1716 |
improvements and optimisations, for example memoisation |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1717 |
\cite{verbatimpp}. However, unlike Verbatim, which works with |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1718 |
derivatives like in our work, Verbatim++ compiles first a regular |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1719 |
expression into a DFA. While this makes lexing fast in many cases, |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1720 |
with examples of bounded regular expressions like |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1721 |
\mbox{@{text "a\<^bsup>{100}{5}\<^esup>"}} |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1722 |
one needs to represent them as |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1723 |
sequences of $a \cdot a \cdot \ldots \cdot a$ (500 a's in sequence). We have run |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1724 |
their extracted code with such a regular expression as a |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1725 |
single lexing rule and a string of 50\,000 a's---lexing in this case |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1726 |
takes approximately 5~minutes. We are not aware of any better |
616 | 1727 |
translation using the traditional notion of DFAs so that we can improve on this. Therefore we |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1728 |
prefer to stick with calculating derivatives, but attempt to make |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1729 |
this calculation (in the future) as fast as possible. What we can guaranty |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1730 |
with the presented work is that the maximum size of the derivatives |
616 | 1731 |
for \mbox{@{text "a\<^bsup>{100}{5}\<^esup>"}$\,\cdot\,$@{term "STAR a"}} is never bigger than 9. This means our Scala |
642 | 1732 |
implementation again only needs a few seconds for this example and matching 50\,000 a's, say. |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1733 |
% |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1734 |
% |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1735 |
%Possible ideas are |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1736 |
%zippers which have been used in the context of parsing of |
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1737 |
%context-free grammars \cite{zipperparser}. |
643 | 1738 |
%\\[-5mm] %\smallskip |
642 | 1739 |
|
643 | 1740 |
%\noindent |
1741 |
%Our Isabelle code including the results from Sec.~5 is available from \url{https://github.com/urbanchr/posix}. |
|
1742 |
%%\\[-10mm] |
|
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1743 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1744 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1745 |
%%\bibliographystyle{plain} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1746 |
\bibliography{root} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1747 |
*} |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1748 |
|
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1749 |
(*<*) |
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1750 |
text {* |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1751 |
\newpage |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1752 |
\appendix |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1753 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1754 |
\section{Some Proofs} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1755 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1756 |
While we have formalised \emph{all} results in Isabelle/HOL, below we shall give some |
569
5af61c89f51e
updated paper and corresponding theories
Christian Urban <christian.urban@kcl.ac.uk>
parents:
563
diff
changeset
|
1757 |
rough details of our reasoning in ``English'' if this is helpful for reviewing. |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1758 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1759 |
\begin{proof}[Proof of Lemma~\ref{codedecode}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1760 |
This follows from the property that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1761 |
$\textit{decode}'\,((\textit{code}\,v) \,@\, bs)\,r = (v, bs)$ holds |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1762 |
for any bit-sequence $bs$ and $\vdash v : r$. This property can be |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1763 |
easily proved by induction on $\vdash v : r$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1764 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1765 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1766 |
\begin{proof}[Proof of Lemma~\ref{mainlemma}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1767 |
This can be proved by induction on $s$ and generalising over |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1768 |
$v$. The interesting point is that we need to prove this in the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1769 |
reverse direction for $s$. This means instead of cases $[]$ and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1770 |
$c\!::\!s$, we have cases $[]$ and $s\,@\,[c]$ where we unravel the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1771 |
string from the back.\footnote{Isabelle/HOL provides an induction principle |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1772 |
for this way of performing the induction.} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1773 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1774 |
The case for $[]$ is routine using Lemmas~\ref{codedecode} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1775 |
and~\ref{retrievecode}. In the case $s\,@\,[c]$, we can infer from |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1776 |
the assumption that $\vdash v : (r\backslash s)\backslash c$ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1777 |
holds. Hence by Prop.~\ref{Posix2} we know that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1778 |
(*) $\vdash \inj\,(r\backslash s)\,c\,v : r\backslash s$ holds too. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1779 |
By definition of $\textit{flex}$ we can unfold the left-hand side |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1780 |
to be |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1781 |
\[ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1782 |
\textit{Some}\,(\textit{flex}\;r\,\textit{id}\,(s\,@\,[c])\,v) = |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1783 |
\textit{Some}\,(\textit{flex}\;r\,\textit{id}\,s\,(\inj\,(r\backslash s)\,c\,v)) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1784 |
\] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1785 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1786 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1787 |
By IH and (*) we can rewrite the right-hand side to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1788 |
$\textit{decode}\,(\textit{retrieve}\,(r^\uparrow\backslash s)\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1789 |
(\inj\,(r\backslash s)\,c\,\,v))\,r$ which is equal to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1790 |
$\textit{decode}\,(\textit{retrieve}\, (r^\uparrow\backslash |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1791 |
(s\,@\,[c]))\,v)\,r$ as required. The last rewrite step is possible |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1792 |
because we generalised over $v$ in our induction. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1793 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1794 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1795 |
\begin{proof}[Proof of Theorem~\ref{thmone}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1796 |
We can first expand both sides using Lem.~\ref{flex} and the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1797 |
definition of \textit{blexer}. This gives us two |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1798 |
\textit{if}-statements, which we need to show to be equal. By |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1799 |
Lemma~\ref{bnullable}\textit{(2)} we know the \textit{if}-tests coincide: |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1800 |
$\textit{bnullable}(r^\uparrow\backslash s) \;\textit{iff}\; |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1801 |
\nullable(r\backslash s)$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1802 |
For the \textit{if}-branch suppose $r_d \dn r^\uparrow\backslash s$ and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1803 |
$d \dn r\backslash s$. We have (*) @{text "nullable d"}. We can then show |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1804 |
by Lemma~\ref{bnullable}\textit{(3)} that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1805 |
% |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1806 |
\[ |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1807 |
\textit{decode}(\textit{bmkeps}\:r_d)\,r = |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1808 |
\textit{decode}(\textit{retrieve}\,r_d\,(\textit{mkeps}\,d))\,r |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1809 |
\] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1810 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1811 |
\noindent |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1812 |
where the right-hand side is equal to |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1813 |
$\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,(\textit{mkeps}\, |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1814 |
d))$ by Lemma~\ref{mainlemma} (we know |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1815 |
$\vdash \textit{mkeps}\,d : d$ by (*)). This shows the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1816 |
\textit{if}-branches return the same value. In the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1817 |
\textit{else}-branches both \textit{lexer} and \textit{blexer} return |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1818 |
\textit{None}. Therefore we can conclude the proof. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1819 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1820 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1821 |
\begin{proof}[Proof of Lemma~\ref{lemone}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1822 |
By induction on @{text r}. For this we can use the properties |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1823 |
@{thm fltsfrewrites} and @{text "rs"}$\;\stackrel{s}{\leadsto}^*$@{text "distinctWith rs \<approx>"} @{term "{}"}. The latter uses |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1824 |
repeated applications of the $LD$ rule which allows the removal |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1825 |
of duplicates that can recognise the same strings. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1826 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1827 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1828 |
\begin{proof}[Proof of Lemma~\ref{lembnull}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1829 |
Straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1830 |
The only interesting case is the rule $LD$ where the property holds since by the side-conditions of that rule the empty string will |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1831 |
be in both @{text "L (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ [r\<^sub>2] @ rs\<^sub>c)"} and |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1832 |
@{text "L (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ rs\<^sub>c)"}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1833 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1834 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1835 |
\begin{proof}[Proof of Lemma \ref{lemthree}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1836 |
By straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1837 |
Again the only interesting case is the rule $LD$ where we need to ensure that |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1838 |
@{text "bmkeps (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ [r\<^sub>2] @ rs\<^sub>c) = |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1839 |
bmkeps (rs\<^sub>a @ [r\<^sub>1] @ rs\<^sub>b @ rs\<^sub>c)"} holds. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1840 |
This is indeed the case because according to the POSIX rules the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1841 |
generated bitsequence is determined by the first alternative that can match the |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1842 |
string (in this case being nullable). |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1843 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1844 |
|
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1845 |
\begin{proof}[Proof of Lemma~\ref{bderlem}] |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1846 |
By straightforward mutual induction on the definition of $\leadsto$ and $\stackrel{s}{\leadsto}$. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1847 |
The case for $LD$ holds because @{term "L (erase (bder c r\<^sub>2)) \<subseteq> L (erase (bder c r\<^sub>1))"} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1848 |
if and only if @{term "L (erase (r\<^sub>2)) \<subseteq> L (erase (r\<^sub>1))"}. |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1849 |
\end{proof} |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1850 |
*} |
615
8881a09a06fd
updated paper for FoSSaCS
Christian Urban <christian.urban@kcl.ac.uk>
parents:
599
diff
changeset
|
1851 |
(*>*) |
496
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1852 |
(*<*) |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1853 |
end |
f493a20feeb3
updated to include the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
1854 |
(*>*) |