thys2/Paper/Paper.thy
author Christian Urban <christian.urban@kcl.ac.uk>
Mon, 07 Feb 2022 14:22:08 +0000
changeset 420 b66a4305749c
parent 418 41a2a3b63853
child 423 b7199d6c672d
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
396
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
(*<*)
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
theory Paper
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     3
imports 
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
   "../Lexer"
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
   "../Simplifying" 
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
   "../Positions"
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
   "../SizeBound4" 
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
   "HOL-Library.LaTeXsugar"
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
begin
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    10
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    11
declare [[show_question_marks = false]]
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    12
398
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
    13
notation (latex output)
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
    14
  If  ("(\<^latex>\<open>\\textrm{\<close>if\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>then\<^latex>\<open>}\<close> (_)/ \<^latex>\<open>\\textrm{\<close>else\<^latex>\<open>}\<close> (_))" 10) and
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
    15
  Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) 
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
    16
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
    17
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    18
abbreviation 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    19
  "der_syn r c \<equiv> der c r"
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    20
abbreviation 
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    21
  "bder_syn r c \<equiv> bder c r"  
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    22
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    23
notation (latex output)
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    24
  der_syn ("_\\_" [79, 1000] 76) and
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    25
  bder_syn ("_\\_" [79, 1000] 76) and
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    26
  bders ("_\\_" [79, 1000] 76) and
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    27
  bders_simp ("_\\\<^sub>s\<^sub>i\<^sub>m\<^sub>p _" [79, 1000] 76) and
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    28
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    29
  ZERO ("\<^bold>0" 81) and 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    30
  ONE ("\<^bold>1" 81) and 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    31
  CH ("_" [1000] 80) and
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    32
  ALT ("_ + _" [77,77] 78) and
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    33
  SEQ ("_ \<cdot> _" [77,77] 78) and
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    34
  STAR ("_\<^sup>\<star>" [79] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    35
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    36
  val.Void ("Empty" 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    37
  val.Char ("Char _" [1000] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    38
  val.Left ("Left _" [79] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    39
  val.Right ("Right _" [1000] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    40
  val.Seq ("Seq _ _" [79,79] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    41
  val.Stars ("Stars _" [79] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    42
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    43
  Prf ("\<turnstile> _ : _" [75,75] 75) and  
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    44
  Posix ("'(_, _') \<rightarrow> _" [63,75,75] 75) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    45
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    46
  flat ("|_|" [75] 74) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    47
  flats ("|_|" [72] 74) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    48
  injval ("inj _ _ _" [79,77,79] 76) and 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    49
  mkeps ("mkeps _" [79] 76) and 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    50
  length ("len _" [73] 73) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    51
  set ("_" [73] 73) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    52
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    53
  AZERO ("ZERO" 81) and 
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
    54
  AONE ("ONE _" [79] 78) and 
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    55
  ACHAR ("CHAR _ _" [79, 79] 80) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    56
  AALTs ("ALTs _ _" [77,77] 78) and
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
    57
  ASEQ ("SEQ _ _ _" [79, 79,79] 78) and
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    58
  ASTAR ("STAR _ _" [79, 79] 78) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    59
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    60
  code ("code _" [79] 74) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    61
  intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    62
  erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    63
  bnullable ("bnullable _" [1000] 80) and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    64
  bmkeps ("bmkeps _" [1000] 80) and
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
    65
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    66
  srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    67
  rrewrites ("_ \<^latex>\<open>\\mbox{$\\,\\leadsto^*$}\<close> _" [71, 71] 80) and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
    68
  blexer_simp ("blexer\<^sup>+" 1000) 
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    69
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    70
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    71
lemma better_retrieve:
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    72
   shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    73
   and   "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    74
  apply (metis list.exhaust retrieve.simps(4))
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
    75
  by (metis list.exhaust retrieve.simps(5))
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    76
396
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    77
(*>*)
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    78
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    79
section {* Introduction *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    80
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    81
text {*
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
    82
400
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    83
In the last fifteen or so years, Brzozowski's derivatives of regular
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    84
expressions have sparked quite a bit of interest in the functional
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    85
programming and theorem prover communities.  The beauty of
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    86
Brzozowski's derivatives \cite{Brzozowski1964} is that they are neatly
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    87
expressible in any functional language, and easily definable and
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
    88
reasoned about in theorem provers---the definitions just consist of
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    89
inductive datatypes and simple recursive functions.  Derivatives of a
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    90
regular expression, written @{term "der c r"}, give a simple solution
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    91
to the problem of matching a string @{term s} with a regular
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    92
expression @{term r}: if the derivative of @{term r} w.r.t.\ (in
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    93
succession) all the characters of the string matches the empty string,
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    94
then @{term r} matches @{term s} (and {\em vice versa}).  We are aware
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    95
of a mechanised correctness proof of Brzozowski's matcher in HOL4 by
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    96
Owens and Slind~\cite{Owens2008}. Another one in Isabelle/HOL is part
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    97
of the work by Krauss and Nipkow \cite{Krauss2011}.  And another one
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    98
in Coq is given by Coquand and Siles \cite{Coquand2012}.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
    99
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   100
There are two difficulties with derivative-based matchers and also
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   101
lexers: First, Brzozowski's original matcher only generates a yes/no
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   102
answer for whether a regular expression matches a string or not.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   103
Sulzmann and Lu~\cite{Sulzmann2014} overcome this difficulty by
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   104
cleverly extending Brzozowski's matching algorithm to POSIX
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   105
lexing. This extended version generates additional information on
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   106
\emph{how} a regular expression matches a string. They achieve this by
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   107
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   108
400
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
   109
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
   110
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   111
The second problem is that Brzozowski's derivatives can 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   112
grow to arbitrarily big sizes. For example if we start with the
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   113
regular expression \mbox{@{text "(a + aa)\<^sup>*"}} and take
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   114
successive derivatives according to the character $a$, we end up with
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   115
a sequence of ever-growing derivatives like 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   116
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   117
\def\ll{\stackrel{\_\backslash{} a}{\longrightarrow}}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   118
\begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   119
\begin{tabular}{rll}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   120
$(a + aa)^*$ & $\ll$ & $(\ONE + \ONE{}a) \cdot (a + aa)^*$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   121
& $\ll$ & $(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* \;+\; (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   122
& $\ll$ & $(\ZERO + \ZERO{}a + \ZERO) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^* \;+\; $\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   123
& & $\qquad(\ZERO + \ZERO{}a + \ONE) \cdot (a + aa)^* + (\ONE + \ONE{}a) \cdot (a + aa)^*$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   124
& $\ll$ & \ldots
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   125
\end{tabular}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   126
\end{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   127
 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   128
\noindent where after around 35 steps we run out of memory on a
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   129
typical computer (we define the precise details of the derivative
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   130
operation later).  Clearly, the notation involving $\ZERO$s and
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   131
$\ONE$s already suggests simplification rules that can be applied to
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   132
regular regular expressions, for example $\ZERO{}r \Rightarrow \ZERO$,
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   133
$\ONE{}r \Rightarrow r$, $\ZERO{} + r \Rightarrow r$ and $r + r
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   134
\Rightarrow r$. While such simple-minded reductions have been proved
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   135
in our earlier work to preserve the correctness of Sulzmann and Lu's
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   136
algorithm, they unfortunately do \emph{not} help with limiting the
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   137
gowth of the derivatives shown above: yes, the growth is slowed, but the
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   138
derivatives can still grow beyond any finite bound.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   139
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   140
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   141
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   142
Sulzmann and Lu introduce a
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   143
bitcoded version of their lexing algorithm. They make some claims
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   144
about the correctness and speed of this version, but do
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   145
not provide any supporting proof arguments, not even
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   146
``pencil-and-paper'' arguments. They wrote about their bitcoded
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   147
``incremental parsing method'' (that is the algorithm to be studied in this
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   148
section):
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   149
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   150
\begin{quote}\it
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   151
  ``Correctness Claim: We further claim that the incremental parsing
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   152
  method [..] in combination with the simplification steps [..]
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   153
  yields POSIX parse trees. We have tested this claim
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   154
  extensively [..] but yet
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   155
  have to work out all proof details.''
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   156
\end{quote}  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   157
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   158
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   159
400
46e5566ad4ba updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 398
diff changeset
   160
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   161
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   162
If a regular expression matches a string, then in general there is more
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   163
than one way of how the string is matched. There are two commonly used
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   164
disambiguation strategies to generate a unique answer: one is called
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   165
GREEDY matching \cite{Frisch2004} and the other is POSIX
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   166
matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   167
For example consider the string @{term xy} and the regular expression
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   168
\mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   169
matched in two `iterations' by the single letter-regular expressions
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   170
@{term x} and @{term y}, or directly in one iteration by @{term xy}. The
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   171
first case corresponds to GREEDY matching, which first matches with the
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   172
left-most symbol and only matches the next symbol in case of a mismatch
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   173
(this is greedy in the sense of preferring instant gratification to
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   174
delayed repletion). The second case is POSIX matching, which prefers the
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   175
longest match.
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   176
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   177
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   178
The derivative has the property (which may almost be
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   179
regarded as its specification) that, for every string @{term s} and
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   180
regular expression @{term r} and character @{term c}, one has @{term
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   181
"cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   182
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   183
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   184
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   185
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   186
\begin{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   187
\begin{tabular}{cc}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   188
  \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   189
  @{thm (lhs) der.simps(1)} & $\dn$ & @{thm (rhs) der.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   190
  @{thm (lhs) der.simps(2)} & $\dn$ & @{thm (rhs) der.simps(2)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   191
  @{thm (lhs) der.simps(3)} & $\dn$ & @{thm (rhs) der.simps(3)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   192
  @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   193
  @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{text "if"} @{term "nullable(r\<^sub>1)"}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   194
  & & @{text "then"} @{term "ALT (SEQ (der c r\<^sub>1) r\<^sub>2) (der c r\<^sub>2)"}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   195
  & & @{text "else"} @{term "SEQ (der c r\<^sub>1) r\<^sub>2"}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   196
  % & & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   197
  @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   198
  \end{tabular}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   199
  &
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   200
  \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   201
  @{thm (lhs) nullable.simps(1)} & $\dn$ & @{thm (rhs) nullable.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   202
  @{thm (lhs) nullable.simps(2)} & $\dn$ & @{thm (rhs) nullable.simps(2)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   203
  @{thm (lhs) nullable.simps(3)} & $\dn$ & @{thm (rhs) nullable.simps(3)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   204
  @{thm (lhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(4)[of "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   205
  @{thm (lhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) nullable.simps(5)[of "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   206
  @{thm (lhs) nullable.simps(6)} & $\dn$ & @{thm (rhs) nullable.simps(6)}\medskip\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   207
  \end{tabular}  
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   208
\end{tabular}  
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   209
\end{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   210
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   211
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   212
\begin{figure}[t]
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   213
\begin{center}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   214
\begin{tikzpicture}[scale=2,node distance=1.3cm,
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   215
                    every node/.style={minimum size=6mm}]
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   216
\node (r1)  {@{term "r\<^sub>1"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   217
\node (r2) [right=of r1]{@{term "r\<^sub>2"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   218
\draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   219
\node (r3) [right=of r2]{@{term "r\<^sub>3"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   220
\draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   221
\node (r4) [right=of r3]{@{term "r\<^sub>4"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   222
\draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   223
\draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   224
\node (v4) [below=of r4]{@{term "v\<^sub>4"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   225
\draw[->,line width=1mm](r4) -- (v4);
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   226
\node (v3) [left=of v4] {@{term "v\<^sub>3"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   227
\draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   228
\node (v2) [left=of v3]{@{term "v\<^sub>2"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   229
\draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   230
\node (v1) [left=of v2] {@{term "v\<^sub>1"}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   231
\draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   232
\draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   233
\end{tikzpicture}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   234
\end{center}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   235
\mbox{}\\[-13mm]
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   236
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   237
\caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   238
matching the string @{term "[a,b,c]"}. The first phase (the arrows from 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   239
left to right) is \Brz's matcher building successive derivatives. If the 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   240
last regular expression is @{term nullable}, then the functions of the 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   241
second phase are called (the top-down and right-to-left arrows): first 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   242
@{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   243
how the empty string has been recognised by @{term "r\<^sub>4"}. After
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   244
that the function @{term inj} ``injects back'' the characters of the string into
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   245
the values.
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   246
\label{Sulz}}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   247
\end{figure} 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   248
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   249
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   250
*}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   251
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   252
section {* Background *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   253
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   254
text {*
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   255
  In our Isabelle/HOL formalisation strings are lists of characters with
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   256
  the empty string being represented by the empty list, written $[]$,
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   257
  and list-cons being written as $\_\!\_\!::\!\_\!\_\,$; string
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   258
  concatenation is $\_\!\_ \,@\, \_\!\_\,$. Often we use the usual
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   259
  bracket notation for lists also for strings; for example a string
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   260
  consisting of just a single character $c$ is written $[c]$.   
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   261
  Our egular expressions are defined as usual as the elements of the following inductive
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   262
  datatype:
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   263
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   264
  \begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   265
  @{text "r :="}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   266
  @{const "ZERO"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   267
  @{const "ONE"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   268
  @{term "CH c"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   269
  @{term "ALT r\<^sub>1 r\<^sub>2"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   270
  @{term "SEQ r\<^sub>1 r\<^sub>2"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   271
  @{term "STAR r"} 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   272
  \end{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   273
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   274
  \noindent where @{const ZERO} stands for the regular expression that does
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   275
  not match any string, @{const ONE} for the regular expression that matches
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   276
  only the empty string and @{term c} for matching a character literal. The
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   277
  language of a regular expression, written $L$, is defined as usual
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   278
  (see for example \cite{AusafDyckhoffUrban2016}).
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   279
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   280
  Central to Brzozowski's regular expression matcher are two functions
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   281
  called $\nullable$ and \emph{derivative}. The latter is written
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   282
  $r\backslash c$ for the derivative of the regular expression $r$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   283
  w.r.t.~the character $c$. Both functions are defined by recursion over
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   284
  regular expressions.  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   285
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   286
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   287
	\begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   288
		\begin{tabular}{lcl}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   289
			$\nullable(\ZERO)$     & $\dn$ & $\mathit{false}$ \\  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   290
			$\nullable(\ONE)$      & $\dn$ & $\mathit{true}$ \\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   291
			$\nullable(c)$ 	       & $\dn$ & $\mathit{false}$ \\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   292
			$\nullable(r_1 + r_2)$ & $\dn$ & $\nullable(r_1) \vee \nullable(r_2)$ \\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   293
			$\nullable(r_1\cdot r_2)$  & $\dn$ & $\nullable(r_1) \wedge \nullable(r_2)$ \\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   294
			$\nullable(r^*)$       & $\dn$ & $\mathit{true}$ \\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   295
		\end{tabular}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   296
	\end{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   297
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   298
  \noindent
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   299
  The derivative function takes a regular expression, say $r$ and a
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   300
  character, say $c$, as input and returns the derivative regular
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   301
  expression.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   302
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   303
	\begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   304
		\begin{tabular}{lcl}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   305
			$\ZERO \backslash c$ & $\dn$ & $\ZERO$\\  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   306
			$\ONE \backslash c$  & $\dn$ & $\ZERO$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   307
			$d \backslash c$     & $\dn$ & 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   308
			$\mathit{if} \;c = d\;\mathit{then}\;\ONE\;\mathit{else}\;\ZERO$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   309
			$(r_1 + r_2)\backslash c$     & $\dn$ & $r_1 \backslash c \,+\, r_2 \backslash c$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   310
			$(r_1 \cdot r_2)\backslash c$ & $\dn$ & $\mathit{if} \nullable(r_1)$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   311
			&   & $\mathit{then}\;(r_1\backslash c) \cdot r_2 \,+\, r_2\backslash c$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   312
			&   & $\mathit{else}\;(r_1\backslash c) \cdot r_2$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   313
			$(r^*)\backslash c$           & $\dn$ & $(r\backslash c) \cdot r^*$\\
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   314
		\end{tabular}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   315
	\end{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   316
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   317
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   318
  Sulzmann and Lu presented two lexing algorithms in their paper from 2014
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   319
  \cite{Sulzmann2014}. This first algorithm consists of two phases: first a
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   320
  matching phase (which is Brzozowski's algorithm) and then a value
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   321
  construction phase. The values encode \emph{how} a regular expression
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   322
  matches a string. \emph{Values} are defined as the inductive datatype
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   323
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   324
  \begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   325
  @{text "v :="}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   326
  @{const "Void"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   327
  @{term "val.Char c"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   328
  @{term "Left v"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   329
  @{term "Right v"} $\mid$
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   330
  @{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   331
  @{term "Stars vs"} 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   332
  \end{center}  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   333
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   334
  \noindent where we use @{term vs} to stand for a list of
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   335
  values.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   336
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   337
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   338
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   339
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   340
  \noindent Sulzmann and Lu also define inductively an inhabitation relation
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   341
  that associates values to regular expressions:
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   342
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   343
  \begin{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   344
  \begin{tabular}{c}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   345
  \\[-8mm]
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   346
  @{thm[mode=Axiom] Prf.intros(4)} \qquad
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   347
  @{thm[mode=Axiom] Prf.intros(5)[of "c"]}\\[4mm]
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   348
  @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad 
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   349
  @{thm[mode=Rule] Prf.intros(3)[of "v\<^sub>2" "r\<^sub>1" "r\<^sub>2"]}\\[4mm]
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   350
  @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\\[4mm]
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   351
  %%@ { t h m[mode=Axiom] Prf.intros(6)[of "r"]} \qquad  
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   352
  @{thm[mode=Rule] Prf.intros(6)[of "r" "vs"]}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   353
  \end{tabular}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   354
  \end{center}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   355
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   356
  \noindent Note that no values are associated with the regular expression
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   357
  @{term ZERO}. It is routine to establish how values ``inhabiting'' a regular
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   358
  expression correspond to the language of a regular expression, namely
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   359
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   360
  \begin{proposition}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   361
  @{thm L_flat_Prf}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   362
  \end{proposition}
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   363
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   364
  Sulzmann-Lu algorithm with inj. State that POSIX rules.
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   365
  metion slg is correct.
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   366
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   367
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   368
  \begin{figure}[t]
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   369
  \begin{center}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   370
  \begin{tabular}{c}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   371
  @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   372
  @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   373
  @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   374
  @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   375
  $\mprset{flushleft}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   376
   \inferrule
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   377
   {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   378
    @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   379
    @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   380
   {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   381
  @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   382
  $\mprset{flushleft}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   383
   \inferrule
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   384
   {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   385
    @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   386
    @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   387
    @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   388
   {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   389
  \end{tabular}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   390
  \end{center}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   391
  \caption{Our inductive definition of POSIX values.}\label{POSIXrules}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   392
  \end{figure}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   393
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   394
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   395
  \begin{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   396
  \begin{tabular}{lcl}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   397
  @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   398
  @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   399
  @{thm (lhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(3)[of "r\<^sub>1" "r\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   400
  @{thm (lhs) mkeps.simps(4)} & $\dn$ & @{thm (rhs) mkeps.simps(4)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   401
  \end{tabular}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   402
  \end{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   403
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   404
  \begin{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   405
  \begin{tabular}{l@ {\hspace{5mm}}lcl}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   406
  \textit{(1)} & @{thm (lhs) injval.simps(1)} & $\dn$ & @{thm (rhs) injval.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   407
  \textit{(2)} & @{thm (lhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]} & $\dn$ & 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   408
      @{thm (rhs) injval.simps(2)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   409
  \textit{(3)} & @{thm (lhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ & 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   410
      @{thm (rhs) injval.simps(3)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   411
  \textit{(4)} & @{thm (lhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   412
      & @{thm (rhs) injval.simps(4)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   413
  \textit{(5)} & @{thm (lhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]} & $\dn$ 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   414
      & @{thm (rhs) injval.simps(5)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>1" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   415
  \textit{(6)} & @{thm (lhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]} & $\dn$ 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   416
      & @{thm (rhs) injval.simps(6)[of "r\<^sub>1" "r\<^sub>2" "c" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   417
  \textit{(7)} & @{thm (lhs) injval.simps(7)[of "r" "c" "v" "vs"]} & $\dn$ 
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   418
      & @{thm (rhs) injval.simps(7)[of "r" "c" "v" "vs"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   419
  \end{tabular}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   420
  \end{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   421
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   422
*}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   423
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   424
section {* Bitcoded Regular Expressions and Derivatives *}
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   425
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   426
text {*
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   427
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   428
  In the second part of their paper \cite{Sulzmann2014},
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   429
  Sulzmann and Lu describe another algorithm that generates POSIX
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   430
  values but dispences with the second phase where characters are
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   431
  injected ``back'' into values. For this they annotate bitcodes to
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   432
  regular expressions, which we define in Isabelle/HOL as the datatype
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   433
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   434
  \begin{center}
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   435
  \begin{tabular}{lcl}
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   436
  @{term breg} & $::=$ & @{term "AZERO"} $\quad\mid\quad$ @{term "AONE bs"}\\
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   437
               & $\mid$ & @{term "ACHAR bs c"}\\
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   438
               & $\mid$ & @{term "AALTs bs rs"}\\
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   439
               & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   440
               & $\mid$ & @{term "ASTAR bs r"}
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   441
  \end{tabular}
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   442
  \end{center}
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   443
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   444
  \noindent where @{text bs} stands for bitsequences; @{text r},
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   445
  @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   446
  expressions; and @{text rs} for lists of bitcoded regular
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   447
  expressions. The binary alternative @{text "ALT bs r\<^sub>1 r\<^sub>2"}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   448
  is just an abbreviation for @{text "ALTs bs [r\<^sub>1, r\<^sub>2]"}. 
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   449
  For bitsequences we just use lists made up of the
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   450
  constants @{text Z} and @{text S}.  The idea with bitcoded regular
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   451
  expressions is to incrementally generate the value information (for
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   452
  example @{text Left} and @{text Right}) as bitsequences. For this 
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   453
  Sulzmann and Lu define a coding
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   454
  function for how values can be coded into bitsequences.
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   455
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   456
  \begin{center}
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   457
  \begin{tabular}{cc}
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   458
  \begin{tabular}{lcl}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   459
  @{thm (lhs) code.simps(1)} & $\dn$ & @{thm (rhs) code.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   460
  @{thm (lhs) code.simps(2)} & $\dn$ & @{thm (rhs) code.simps(2)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   461
  @{thm (lhs) code.simps(3)} & $\dn$ & @{thm (rhs) code.simps(3)}\\
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   462
  @{thm (lhs) code.simps(4)} & $\dn$ & @{thm (rhs) code.simps(4)}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   463
  \end{tabular}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   464
  &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   465
  \begin{tabular}{lcl}
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   466
  @{thm (lhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) code.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   467
  @{thm (lhs) code.simps(6)} & $\dn$ & @{thm (rhs) code.simps(6)}\\
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   468
  @{thm (lhs) code.simps(7)} & $\dn$ & @{thm (rhs) code.simps(7)}\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   469
  \mbox{\phantom{XX}}\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   470
  \end{tabular}
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   471
  \end{tabular}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   472
  \end{center}
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   473
   
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   474
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   475
  As can be seen, this coding is ``lossy'' in the sense that we do not
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   476
  record explicitly character values and also not sequence values (for
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   477
  them we just append two bitsequences). However, the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   478
  different alternatives for @{text Left}, respectively @{text Right}, are recorded as @{text Z} and
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   479
  @{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   480
  if there is still a value coming in the list of @{text Stars}, whereas @{text S}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   481
  indicates the end of the list. The lossiness makes the process of
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   482
  decoding a bit more involved, but the point is that if we have a
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   483
  regular expression \emph{and} a bitsequence of a corresponding value,
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   484
  then we can always decode the value accurately. The decoding can be
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   485
  defined by using two functions called $\textit{decode}'$ and
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   486
  \textit{decode}:
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   487
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   488
  \begin{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   489
  \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   490
  $\textit{decode}'\,bs\,(\ONE)$ & $\dn$ & $(\Empty, bs)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   491
  $\textit{decode}'\,bs\,(c)$ & $\dn$ & $(\Char\,c, bs)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   492
  $\textit{decode}'\,(\Z\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   493
     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}\;
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   494
       (\Left\,v, bs_1)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   495
  $\textit{decode}'\,(\S\!::\!bs)\;(r_1 + r_2)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   496
     $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r_2\;\textit{in}\;
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   497
       (\Right\,v, bs_1)$\\                           
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   498
  $\textit{decode}'\,bs\;(r_1\cdot r_2)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   499
        $\textit{let}\,(v_1, bs_1) = \textit{decode}'\,bs\,r_1\;\textit{in}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   500
  & &   $\textit{let}\,(v_2, bs_2) = \textit{decode}'\,bs_1\,r_2$
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   501
        \hspace{2mm}$\textit{in}\;(\Seq\,v_1\,v_2, bs_2)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   502
  $\textit{decode}'\,(\Z\!::\!bs)\,(r^*)$ & $\dn$ & $(\Stars\,[], bs)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   503
  $\textit{decode}'\,(\S\!::\!bs)\,(r^*)$ & $\dn$ & 
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   504
         $\textit{let}\,(v, bs_1) = \textit{decode}'\,bs\,r\;\textit{in}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   505
  & &   $\textit{let}\,(\Stars\,vs, bs_2) = \textit{decode}'\,bs_1\,r^*$
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   506
        \hspace{2mm}$\textit{in}\;(\Stars\,v\!::\!vs, bs_2)$\bigskip\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   507
  $\textit{decode}\,bs\,r$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   508
     $\textit{let}\,(v, bs') = \textit{decode}'\,bs\,r\;\textit{in}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   509
  & & \hspace{7mm}$\textit{if}\;bs' = []\;\textit{then}\;\textit{Some}\,v\;
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   510
       \textit{else}\;\textit{None}$   
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   511
  \end{tabular}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   512
  \end{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   513
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   514
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   515
  The function \textit{decode} checks whether all of the bitsequence is
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   516
  consumed and returns the corresponding value as @{term "Some v"}; otherwise
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   517
  it fails with @{text "None"}. We can establish that for a value $v$
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   518
  inhabited by a regular expression $r$, the decoding of its
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   519
  bitsequence never fails.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   520
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   521
\begin{lemma}\label{codedecode}\it
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   522
  If $\;\vdash v : r$ then
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   523
  $\;\textit{decode}\,(\textit{code}\, v)\,r = \textit{Some}\, v$.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   524
\end{lemma}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   525
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   526
\begin{proof}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   527
  This follows from the property that
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   528
  $\textit{decode}'\,((\textit{code}\,v) \,@\, bs)\,r = (v, bs)$ holds
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   529
  for any bit-sequence $bs$ and $\vdash v : r$. This property can be
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   530
  easily proved by induction on $\vdash v : r$.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   531
\end{proof}  
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   532
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   533
  Sulzmann and Lu define the function \emph{internalise}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   534
  in order to transform standard regular expressions into annotated
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   535
  regular expressions. We write this operation as $r^\uparrow$.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   536
  This internalisation uses the following
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   537
  \emph{fuse} function.	     
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   538
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   539
  \begin{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   540
  \begin{tabular}{lcl}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   541
  $\textit{fuse}\,bs\,(\textit{ZERO})$ & $\dn$ & $\textit{ZERO}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   542
  $\textit{fuse}\,bs\,(\textit{ONE}\,bs')$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   543
     $\textit{ONE}\,(bs\,@\,bs')$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   544
  $\textit{fuse}\,bs\,(\textit{CHAR}\,bs'\,c)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   545
     $\textit{CHAR}\,(bs\,@\,bs')\,c$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   546
  $\textit{fuse}\,bs\,(\textit{ALTs}\,bs'\,rs)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   547
     $\textit{ALTs}\,(bs\,@\,bs')\,rs$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   548
  $\textit{fuse}\,bs\,(\textit{SEQ}\,bs'\,r_1\,r_2)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   549
     $\textit{SEQ}\,(bs\,@\,bs')\,r_1\,r_2$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   550
  $\textit{fuse}\,bs\,(\textit{STAR}\,bs'\,r)$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   551
     $\textit{STAR}\,(bs\,@\,bs')\,r$
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   552
  \end{tabular}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   553
  \end{center}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   554
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   555
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   556
  A regular expression can then be \emph{internalised} into a bitcoded
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   557
  regular expression as follows.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   558
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   559
  \begin{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   560
  \begin{tabular}{lcl}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   561
  $(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   562
  $(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   563
  $(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   564
  $(r_1 + r_2)^\uparrow$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   565
         $\textit{ALT}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\,
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   566
                            (\textit{fuse}\,[\S]\,r_2^\uparrow)$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   567
  $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   568
         $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   569
  $(r^*)^\uparrow$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   570
         $\textit{STAR}\;[]\,r^\uparrow$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   571
  \end{tabular}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   572
  \end{center}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   573
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   574
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   575
  There is also an \emph{erase}-function, written $a^\downarrow$, which
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   576
  transforms a bitcoded regular expression into a (standard) regular
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   577
  expression by just erasing the annotated bitsequences. We omit the
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   578
  straightforward definition. For defining the algorithm, we also need
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   579
  the functions \textit{bnullable} and \textit{bmkeps}, which are the
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   580
  ``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   581
  bitcoded regular expressions, instead of regular expressions.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   582
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   583
  \begin{center}
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   584
  \begin{tabular}{@ {}c@ {}c@ {}}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   585
  \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   586
  $\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{false}$\\
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   587
  $\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{true}$\\
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   588
  $\textit{bnullable}\,(\textit{CHAR}\,bs\,c)$ & $\dn$ & $\textit{false}$\\
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   589
  $\textit{bnullable}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   590
     $\exists\, r \in \rs. \,\textit{bnullable}\,r$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   591
  $\textit{bnullable}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   592
     $\textit{bnullable}\,r_1\wedge \textit{bnullable}\,r_2$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   593
  $\textit{bnullable}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   594
     $\textit{true}$
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   595
  \end{tabular}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   596
  &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   597
  \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   598
  $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   599
  $\textit{bmkeps}\,(\textit{ALTs}\,bs\,r\!::\!\rs)$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   600
     $\textit{if}\;\textit{bnullable}\,r$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   601
  & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,r$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   602
  & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,\rs$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   603
  $\textit{bmkeps}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   604
  \multicolumn{3}{r}{$bs \,@\,\textit{bmkeps}\,r_1\,@\, \textit{bmkeps}\,r_2$}\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   605
  $\textit{bmkeps}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   606
     $bs \,@\, [\S]$
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   607
  \end{tabular}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   608
  \end{tabular}
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   609
  \end{center}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   610
 
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   611
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   612
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   613
  The key function in the bitcoded algorithm is the derivative of an
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   614
  bitcoded regular expression. This derivative calculates the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   615
  derivative but at the same time also the incremental part of bitsequences
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   616
  that contribute to constructing a POSIX value.	
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   617
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   618
  \begin{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   619
  \begin{tabular}{@ {}lcl@ {}}
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   620
  $(\textit{ZERO})\backslash c$ & $\dn$ & $\textit{ZERO}$ \\  
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   621
  $(\textit{ONE}\;bs)\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   622
  $(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ &
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   623
        $\textit{if}\;c=d\; \;\textit{then}\;
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   624
         \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   625
  $(\textit{ALTs}\;bs\,\rs)\backslash c$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   626
        $\textit{ALTs}\,bs\,(\mathit{map}\,(\_\backslash c)\,\rs)$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   627
  $(\textit{SEQ}\;bs\,r_1\,r_2)\backslash c$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   628
     $\textit{if}\;\textit{bnullable}\,r_1$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   629
  & &$\textit{then}\;\textit{ALT}\,bs\,(\textit{SEQ}\,[]\,(r_1\backslash c)\,r_2)$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   630
  & &$\phantom{\textit{then}\;\textit{ALT}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,r_1)\,(r_2\backslash c))$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   631
  & &$\textit{else}\;\textit{SEQ}\,bs\,(r_1\backslash c)\,r_2$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   632
  $(\textit{STAR}\,bs\,r)\backslash c$ & $\dn$ &
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   633
      $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\backslash c))\,
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   634
       (\textit{STAR}\,[]\,r)$
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   635
  \end{tabular}    
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   636
  \end{center}
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   637
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   638
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   639
  \noindent
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   640
  This function can also be extended to strings, written $r\backslash s$,
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   641
  just like the standard derivative.  We omit the details. Finally we
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   642
  can define Sulzmann and Lu's bitcoded lexer, which we call \textit{blexer}:
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   643
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   644
  \begin{center}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   645
\begin{tabular}{lcl}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   646
  $\textit{blexer}\;r\,s$ & $\dn$ &
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   647
      $\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$\\                
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   648
  & & $\;\;\;\;\textit{if}\; \textit{bnullable}(r_{der}) \;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   649
       \;\;\textit{else}\;\textit{None}$
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   650
\end{tabular}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   651
\end{center}
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   652
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   653
  \noindent
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   654
This bitcoded lexer first internalises the regular expression $r$ and then
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   655
builds the bitcoded derivative according to $s$. If the derivative is
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   656
(b)nullable the string is in the language of $r$ and it extracts the bitsequence using the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   657
$\textit{bmkeps}$ function. Finally it decodes the bitsequence into a value.  If
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   658
the derivative is \emph{not} nullable, then $\textit{None}$ is
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   659
returned. We can show that this way of calculating a value
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   660
generates the same result as with \textit{lexer}.
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   661
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   662
Before we can proceed we need to define a helper function, called
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   663
\textit{retrieve}, which Sulzmann and Lu introduced for the correctness proof.
416
57182b36ec01 more with the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 410
diff changeset
   664
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   665
\begin{center}
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   666
  \begin{tabular}{lcl}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   667
  @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   668
  @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   669
  @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   670
  @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   671
  @{thm (lhs) better_retrieve(2)} & $\dn$ & @{thm (rhs) better_retrieve(2)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   672
  @{thm (lhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   673
      & $\dn$ & @{thm (rhs) retrieve.simps(6)[of _ "r\<^sub>1" "r\<^sub>2" "v\<^sub>1" "v\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   674
  @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   675
  @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   676
  \end{tabular}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   677
  \end{center}
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   678
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   679
\noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   680
The idea behind this function is to retrieve a possibly partial
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   681
bitcode from a bitcoded regular expression, where the retrieval is
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   682
guided by a value.  For example if the value is $\Left$ then we
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   683
descend into the left-hand side of an alternative in order to
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   684
assemble the bitcode. Similarly for
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   685
$\Right$. The property we can show is that for a given $v$ and $r$
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   686
with $\vdash v : r$, the retrieved bitsequence from the internalised
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   687
regular expression is equal to the bitcoded version of $v$.
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   688
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   689
\begin{lemma}\label{retrievecode}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   690
If $\vdash v : r$ then $\textit{code}\, v = \textit{retrieve}\,(r^\uparrow)\,v$.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   691
\end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   692
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   693
\noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   694
We also need some auxiliary facts about how the bitcoded operations
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   695
relate to the ``standard'' operations on regular expressions. For
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   696
example if we build a bitcoded derivative and erase the result, this
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   697
is the same as if we first erase the bitcoded regular expression and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   698
then perform the ``standard'' derivative operation.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   699
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   700
\begin{lemma}\label{bnullable}\mbox{}\smallskip\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   701
  \begin{tabular}{ll}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   702
\textit{(1)} & $(a\backslash s)^\downarrow = (a^\downarrow)\backslash s$\\    
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   703
\textit{(2)} & $\textit{bnullable}(a)$ iff $\textit{nullable}(a^\downarrow)$\\
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   704
\textit{(3)} & $\textit{bmkeps}(a) = \textit{retrieve}\,a\,(\textit{mkeps}\,(a^\downarrow))$ provided $\textit{nullable}(a^\downarrow)$.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   705
\end{tabular}  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   706
\end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   707
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   708
\begin{proof}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   709
  All properties are by induction on annotated regular expressions. There are no
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   710
  interesting cases.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   711
\end{proof}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   712
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   713
\noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   714
This brings us to our main lemma in this section: if we build a
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   715
derivative, say $r\backslash s$ and have a value, say $v$, inhabited
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   716
by this derivative, then we can produce the result $\lexer$ generates
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   717
by applying this value to the stacked-up injection functions
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   718
$\textit{flex}$ assembles. The lemma establishes that this is the same
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   719
value as if we build the annotated derivative $r^\uparrow\backslash s$
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   720
and then retrieve the corresponding bitcoded version, followed by a
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   721
decoding step.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   722
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   723
\begin{lemma}[Main Lemma]\label{mainlemma}\it
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   724
If $\vdash v : r\backslash s$ then 
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   725
\[\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,v) =
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   726
  \textit{decode}(\textit{retrieve}\,(r^\uparrow \backslash s)\,v)\,r\]
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   727
\end{lemma}  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   728
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   729
\begin{proof}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   730
  This can be proved by induction on $s$ and generalising over
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   731
  $v$. The interesting point is that we need to prove this in the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   732
  reverse direction for $s$. This means instead of cases $[]$ and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   733
  $c\!::\!s$, we have cases $[]$ and $s\,@\,[c]$ where we unravel the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   734
  string from the back.\footnote{Isabelle/HOL provides an induction principle
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   735
    for this way of performing the induction.}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   736
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   737
  The case for $[]$ is routine using Lemmas~\ref{codedecode}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   738
  and~\ref{retrievecode}. In the case $s\,@\,[c]$, we can infer from
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   739
  the assumption that $\vdash v : (r\backslash s)\backslash c$
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   740
  holds. Hence by Lemma~\ref{Posix2} we know that 
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   741
  (*) $\vdash \inj\,(r\backslash s)\,c\,v : r\backslash s$ holds too.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   742
  By definition of $\textit{flex}$ we can unfold the left-hand side
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   743
  to be
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   744
  \[
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   745
    \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,(s\,@\,[c])\,v) =
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   746
    \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,s\,(\inj\,(r\backslash s)\,c\,v))  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   747
  \]  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   748
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   749
  \noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   750
  By induction hypothesis and (*) we can rewrite the right-hand side to
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   751
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   752
  \[
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   753
    \textit{decode}\,(\textit{retrieve}\,(r^\uparrow\backslash s)\;
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   754
    (\inj\,(r\backslash s)\,c\,\,v))\,r
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   755
  \]
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   756
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   757
  \noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   758
  which is equal to
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   759
  $\textit{decode}\,(\textit{retrieve}\, (r^\uparrow\backslash
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   760
  (s\,@\,[c]))\,v)\,r$ as required. The last rewrite step is possible
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   761
  because we generalised over $v$ in our induction.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   762
\end{proof}  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   763
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   764
\noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   765
With this lemma in place, we can prove the correctness of \textit{blexer} such
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   766
that it produces the same result as \textit{lexer}.
405
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   767
3cfea5bb5e23 updated some of the text and cardinality proof
Christian Urban <christian.urban@kcl.ac.uk>
parents: 402
diff changeset
   768
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   769
\begin{theorem}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   770
$\textit{lexer}\,r\,s = \textit{blexer}\,r\,s$
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   771
\end{theorem}  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   772
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   773
\begin{proof}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   774
  We can first expand both sides using Lemma~\ref{flex} and the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   775
  definition of \textit{blexer}. This gives us two
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   776
  \textit{if}-statements, which we need to show to be equal. By 
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   777
  Lemma~\ref{bnullable}\textit{(2)} we know the \textit{if}-tests coincide:
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   778
  \[
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   779
    \textit{bnullable}(r^\uparrow\backslash s) \;\textit{iff}\;
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   780
    \nullable(r\backslash s)
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   781
  \]
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   782
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   783
  \noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   784
  For the \textit{if}-branch suppose $r_d \dn r^\uparrow\backslash s$ and
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   785
  $d \dn r\backslash s$. We have (*) $\nullable\,d$. We can then show
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   786
  by Lemma~\ref{bnullable}\textit{(3)} that
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   787
  %
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   788
  \[
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   789
    \textit{decode}(\textit{bmkeps}\,r_d)\,r =
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   790
    \textit{decode}(\textit{retrieve}\,a\,(\textit{mkeps}\,d))\,r
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   791
  \]
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   792
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   793
  \noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   794
  where the right-hand side is equal to
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   795
  $\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,(\textit{mkeps}\,
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   796
  d))$ by Lemma~\ref{mainlemma} (we know
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   797
  $\vdash \textit{mkeps}\,d : d$ by (*)).  This shows the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   798
  \textit{if}-branches return the same value. In the
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   799
  \textit{else}-branches both \textit{lexer} and \textit{blexer} return
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   800
  \textit{None}. Therefore we can conclude the proof.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   801
\end{proof}  
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   802
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   803
\noindent
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   804
This establishes that the bitcoded algorithm by Sulzmann
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   805
and Lu without simplification produces correct results. This was
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   806
only conjectured in their paper \cite{Sulzmann2014}. The next step
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   807
is to add simplifications.
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   808
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   809
*}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   810
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   811
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   812
section {* Simplification *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   813
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   814
text {*
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   815
420
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   816
     Derivatives as calculated by Brzozowski’s method are usually more
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   817
     complex regular expressions than the initial one; the result is
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   818
     that the derivative-based matching and lexing algorithms are
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   819
     often abysmally slow.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   820
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   821
     However, as Sulzmann and
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   822
     Lu wrote, various optimisations are possible, such as the
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   823
     simplifications of 0 + r,r + 0,1 · r and r · 1 to r. These
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   824
     simplifications can speed up the algorithms considerably.
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   825
b66a4305749c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 418
diff changeset
   826
418
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   827
     \begin{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   828
     @{thm[mode=IfThen] bnullable0(1)[of "r\<^sub>1" "r\<^sub>2"]}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   829
     \end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   830
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   831
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   832
     \begin{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   833
     @{thm[mode=IfThen] rewrite_bmkeps_aux(1)[of "r\<^sub>1" "r\<^sub>2"]}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   834
     \end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   835
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   836
     \begin{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   837
     @{thm[mode=IfThen] rewrites_to_bsimp}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   838
     \end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   839
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   840
     \begin{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   841
     @{thm[mode=IfThen] rewrite_preserves_bder(1)[of "r\<^sub>1" "r\<^sub>2"]}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   842
     \end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   843
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   844
     \begin{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   845
     @{thm[mode=IfThen] central}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   846
     \end{lemma}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   847
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   848
     \begin{theorem}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   849
     @{thm[mode=IfThen] main_blexer_simp}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   850
     \end{theorem}
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   851
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   852
     Sulzmann \& Lu apply simplification via a fixpoint operation
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   853
41a2a3b63853 more of the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 416
diff changeset
   854
     ; also does not use erase to filter out duplicates.
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   855
  
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   856
   not direct correspondence with PDERs, because of example
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   857
   problem with retrieve 
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   858
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   859
   correctness
398
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   860
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   861
   
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   862
    
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   863
398
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   864
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   865
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   866
   \begin{figure}[t]
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   867
  \begin{center}
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   868
  \begin{tabular}{c}
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   869
  @{thm[mode=Axiom] bs1[of _ "r\<^sub>2"]}\qquad
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   870
  @{thm[mode=Axiom] bs2[of _ "r\<^sub>1"]}\qquad
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   871
  @{thm[mode=Axiom] bs3[of "bs\<^sub>1" "bs\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   872
  @{thm[mode=Rule] bs4[of "r\<^sub>1" "r\<^sub>2" _ "r\<^sub>3"]}\qquad
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   873
  @{thm[mode=Rule] bs5[of "r\<^sub>3" "r\<^sub>4" _ "r\<^sub>1"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   874
  @{thm[mode=Axiom] bs6}\qquad
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   875
  @{thm[mode=Axiom] bs7}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   876
  @{thm[mode=Rule] bs8[of "rs\<^sub>1" "rs\<^sub>2"]}\\
410
9261d980225d updated papers
Christian Urban <christian.urban@kcl.ac.uk>
parents: 405
diff changeset
   877
  %@ { t hm[mode=Axiom] ss1}\qquad
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   878
  @{thm[mode=Rule] ss2[of "rs\<^sub>1" "rs\<^sub>2"]}\qquad
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   879
  @{thm[mode=Rule] ss3[of "r\<^sub>1" "r\<^sub>2"]}\\
398
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   880
  @{thm[mode=Axiom] ss4}\qquad
402
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   881
  @{thm[mode=Axiom] ss5[of "bs" "rs\<^sub>1" "rs\<^sub>2"]}\\
1612f2a77bf6 more definitions in the paper
Christian Urban <christian.urban@kcl.ac.uk>
parents: 400
diff changeset
   882
  @{thm[mode=Rule] ss6[of "r\<^sub>1" "r\<^sub>2" "rs\<^sub>1" "rs\<^sub>2" "rs\<^sub>3"]}\\
398
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   883
  \end{tabular}
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   884
  \end{center}
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   885
  \caption{???}\label{SimpRewrites}
dac6d27c99c6 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 397
diff changeset
   886
  \end{figure}
397
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   887
*}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   888
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   889
section {* Bound - NO *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   890
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   891
section {* Bounded Regex / Not *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   892
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   893
section {* Conclusion *}
e1b74d618f1b updated Sizebound4
Christian Urban <christian.urban@kcl.ac.uk>
parents: 396
diff changeset
   894
396
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   895
text {*
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   896
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   897
\cite{AusafDyckhoffUrban2016}
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   898
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   899
%%\bibliographystyle{plain}
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   900
\bibliography{root}
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   901
*}
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   902
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   903
(*<*)
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   904
end
cc8e231529fb added ITP paper
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   905
(*>*)