thys2/Paper/Paper.thy
changeset 397 e1b74d618f1b
parent 396 cc8e231529fb
child 398 dac6d27c99c6
equal deleted inserted replaced
396:cc8e231529fb 397:e1b74d618f1b
     5    "../Simplifying" 
     5    "../Simplifying" 
     6    "../Positions"
     6    "../Positions"
     7    "../SizeBound4" 
     7    "../SizeBound4" 
     8    "HOL-Library.LaTeXsugar"
     8    "HOL-Library.LaTeXsugar"
     9 begin
     9 begin
       
    10 
       
    11 declare [[show_question_marks = false]]
       
    12 
       
    13 abbreviation 
       
    14   "der_syn r c \<equiv> der c r"
       
    15 
       
    16 notation (latex output)
       
    17   der_syn ("_\\_" [79, 1000] 76) and
       
    18 
       
    19   ZERO ("\<^bold>0" 81) and 
       
    20   ONE ("\<^bold>1" 81) and 
       
    21   CH ("_" [1000] 80) and
       
    22   ALT ("_ + _" [77,77] 78) and
       
    23   SEQ ("_ \<cdot> _" [77,77] 78) and
       
    24   STAR ("_\<^sup>\<star>" [79] 78) 
       
    25 
    10 (*>*)
    26 (*>*)
       
    27 
       
    28 section {* Introduction *}
       
    29 
       
    30 text {*
       
    31 
       
    32 Brzozowski \cite{Brzozowski1964} introduced the notion of the {\em
       
    33 derivative} @{term "der c r"} of a regular expression \<open>r\<close> w.r.t.\
       
    34 a character~\<open>c\<close>, and showed that it gave a simple solution to the
       
    35 problem of matching a string @{term s} with a regular expression @{term
       
    36 r}: if the derivative of @{term r} w.r.t.\ (in succession) all the
       
    37 characters of the string matches the empty string, then @{term r}
       
    38 matches @{term s} (and {\em vice versa}). The derivative has the
       
    39 property (which may almost be regarded as its specification) that, for
       
    40 every string @{term s} and regular expression @{term r} and character
       
    41 @{term c}, one has @{term "cs \<in> L(r)"} if and only if \mbox{@{term "s \<in> L(der c r)"}}. 
       
    42 The beauty of Brzozowski's derivatives is that
       
    43 they are neatly expressible in any functional language, and easily
       
    44 definable and reasoned about in theorem provers---the definitions just
       
    45 consist of inductive datatypes and simple recursive functions. A
       
    46 mechanised correctness proof of Brzozowski's matcher in for example HOL4
       
    47 has been mentioned by Owens and Slind~\cite{Owens2008}. Another one in
       
    48 Isabelle/HOL is part of the work by Krauss and Nipkow \cite{Krauss2011}.
       
    49 And another one in Coq is given by Coquand and Siles \cite{Coquand2012}.
       
    50 
       
    51 If a regular expression matches a string, then in general there is more
       
    52 than one way of how the string is matched. There are two commonly used
       
    53 disambiguation strategies to generate a unique answer: one is called
       
    54 GREEDY matching \cite{Frisch2004} and the other is POSIX
       
    55 matching~\cite{POSIX,Kuklewicz,OkuiSuzuki2010,Sulzmann2014,Vansummeren2006}.
       
    56 For example consider the string @{term xy} and the regular expression
       
    57 \mbox{@{term "STAR (ALT (ALT x y) xy)"}}. Either the string can be
       
    58 matched in two `iterations' by the single letter-regular expressions
       
    59 @{term x} and @{term y}, or directly in one iteration by @{term xy}. The
       
    60 first case corresponds to GREEDY matching, which first matches with the
       
    61 left-most symbol and only matches the next symbol in case of a mismatch
       
    62 (this is greedy in the sense of preferring instant gratification to
       
    63 delayed repletion). The second case is POSIX matching, which prefers the
       
    64 longest match.
       
    65 
       
    66 
       
    67 
       
    68 \begin{figure}[t]
       
    69 \begin{center}
       
    70 \begin{tikzpicture}[scale=2,node distance=1.3cm,
       
    71                     every node/.style={minimum size=6mm}]
       
    72 \node (r1)  {@{term "r\<^sub>1"}};
       
    73 \node (r2) [right=of r1]{@{term "r\<^sub>2"}};
       
    74 \draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
       
    75 \node (r3) [right=of r2]{@{term "r\<^sub>3"}};
       
    76 \draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
       
    77 \node (r4) [right=of r3]{@{term "r\<^sub>4"}};
       
    78 \draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
       
    79 \draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
       
    80 \node (v4) [below=of r4]{@{term "v\<^sub>4"}};
       
    81 \draw[->,line width=1mm](r4) -- (v4);
       
    82 \node (v3) [left=of v4] {@{term "v\<^sub>3"}};
       
    83 \draw[->,line width=1mm](v4)--(v3) node[below,midway] {\<open>inj r\<^sub>3 c\<close>};
       
    84 \node (v2) [left=of v3]{@{term "v\<^sub>2"}};
       
    85 \draw[->,line width=1mm](v3)--(v2) node[below,midway] {\<open>inj r\<^sub>2 b\<close>};
       
    86 \node (v1) [left=of v2] {@{term "v\<^sub>1"}};
       
    87 \draw[->,line width=1mm](v2)--(v1) node[below,midway] {\<open>inj r\<^sub>1 a\<close>};
       
    88 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
       
    89 \end{tikzpicture}
       
    90 \end{center}
       
    91 \mbox{}\\[-13mm]
       
    92 
       
    93 \caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014},
       
    94 matching the string @{term "[a,b,c]"}. The first phase (the arrows from 
       
    95 left to right) is \Brz's matcher building successive derivatives. If the 
       
    96 last regular expression is @{term nullable}, then the functions of the 
       
    97 second phase are called (the top-down and right-to-left arrows): first 
       
    98 @{term mkeps} calculates a value @{term "v\<^sub>4"} witnessing
       
    99 how the empty string has been recognised by @{term "r\<^sub>4"}. After
       
   100 that the function @{term inj} ``injects back'' the characters of the string into
       
   101 the values.
       
   102 \label{Sulz}}
       
   103 \end{figure} 
       
   104 
       
   105 
       
   106 *}
       
   107 
       
   108 section {* Background *}
       
   109 
       
   110 text {*
       
   111   Sulzmann-Lu algorithm with inj. State that POSIX rules.
       
   112   metion slg is correct.
       
   113 
       
   114 
       
   115   \begin{figure}[t]
       
   116   \begin{center}
       
   117   \begin{tabular}{c}
       
   118   @{thm[mode=Axiom] Posix.intros(1)}\<open>P\<close>@{term "ONE"} \qquad
       
   119   @{thm[mode=Axiom] Posix.intros(2)}\<open>P\<close>@{term "c"}\medskip\\
       
   120   @{thm[mode=Rule] Posix.intros(3)[of "s" "r\<^sub>1" "v" "r\<^sub>2"]}\<open>P+L\<close>\qquad
       
   121   @{thm[mode=Rule] Posix.intros(4)[of "s" "r\<^sub>2" "v" "r\<^sub>1"]}\<open>P+R\<close>\medskip\\
       
   122   $\mprset{flushleft}
       
   123    \inferrule
       
   124    {@{thm (prem 1) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \qquad
       
   125     @{thm (prem 2) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]} \\\\
       
   126     @{thm (prem 3) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}
       
   127    {@{thm (concl) Posix.intros(5)[of "s\<^sub>1" "r\<^sub>1" "v\<^sub>1" "s\<^sub>2" "r\<^sub>2" "v\<^sub>2"]}}$\<open>PS\<close>\\
       
   128   @{thm[mode=Axiom] Posix.intros(7)}\<open>P[]\<close>\medskip\\
       
   129   $\mprset{flushleft}
       
   130    \inferrule
       
   131    {@{thm (prem 1) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
       
   132     @{thm (prem 2) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \qquad
       
   133     @{thm (prem 3) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]} \\\\
       
   134     @{thm (prem 4) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}
       
   135    {@{thm (concl) Posix.intros(6)[of "s\<^sub>1" "r" "v" "s\<^sub>2" "vs"]}}$\<open>P\<star>\<close>
       
   136   \end{tabular}
       
   137   \end{center}
       
   138   \caption{Our inductive definition of POSIX values.}\label{POSIXrules}
       
   139   \end{figure}
       
   140 
       
   141 
       
   142 *}
       
   143 
       
   144 section {* Bitcoded Derivatives *}
       
   145 
       
   146 text {*
       
   147   bitcoded regexes / decoding / bmkeps
       
   148   gets rid of the second phase (only single phase)   
       
   149   correctness
       
   150 *}
       
   151 
       
   152 
       
   153 section {* Simplification *}
       
   154 
       
   155 text {*
       
   156    not direct correspondence with PDERs, because of example
       
   157    problem with retrieve 
       
   158 
       
   159    correctness
       
   160 *}
       
   161 
       
   162 section {* Bound - NO *}
       
   163 
       
   164 section {* Bounded Regex / Not *}
       
   165 
       
   166 section {* Conclusion *}
       
   167 
    11 text {*
   168 text {*
    12 
   169 
    13 \cite{AusafDyckhoffUrban2016}
   170 \cite{AusafDyckhoffUrban2016}
    14 
   171 
    15 %%\bibliographystyle{plain}
   172 %%\bibliographystyle{plain}