thys/Paper/Paper.thy
changeset 114 8b41d01b5e5d
parent 113 90fe1a1d7d0e
child 115 15ef2af1a6f2
equal deleted inserted replaced
113:90fe1a1d7d0e 114:8b41d01b5e5d
     5 
     5 
     6 declare [[show_question_marks = false]]
     6 declare [[show_question_marks = false]]
     7 
     7 
     8 abbreviation 
     8 abbreviation 
     9  "der_syn r c \<equiv> der c r"
     9  "der_syn r c \<equiv> der c r"
       
    10 
       
    11 abbreviation 
       
    12  "ders_syn r s \<equiv> ders s r"
    10 
    13 
    11 notation (latex output)
    14 notation (latex output)
    12   If  ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
    15   If  ("(\<^raw:\textrm{>if\<^raw:}> (_)/ \<^raw:\textrm{>then\<^raw:}> (_)/ \<^raw:\textrm{>else\<^raw:}> (_))" 10) and
    13   Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and
    16   Cons ("_\<^raw:\mbox{$\,$}>::\<^raw:\mbox{$\,$}>_" [78,77] 73) and
    14   
    17   
    23   val.Char ("Char _" [1000] 79) and
    26   val.Char ("Char _" [1000] 79) and
    24   val.Left ("Left _" [1000] 78) and
    27   val.Left ("Left _" [1000] 78) and
    25   val.Right ("Right _" [1000] 78) and
    28   val.Right ("Right _" [1000] 78) and
    26   
    29   
    27   L ("L'(_')" [10] 78) and
    30   L ("L'(_')" [10] 78) and
    28   der_syn ("_\\_" [79, 1000] 76) and
    31   der_syn ("_\\_" [79, 1000] 76) and  
       
    32   ders_syn ("_\\_" [79, 1000] 76) and
    29   flat ("|_|" [75] 73) and
    33   flat ("|_|" [75] 73) and
    30   Sequ ("_ @ _" [78,77] 63) and
    34   Sequ ("_ @ _" [78,77] 63) and
    31   injval ("inj _ _ _" [78,77,78] 77) and 
    35   injval ("inj _ _ _" [78,77,78] 77) and 
    32   projval ("proj _ _ _" [1000,77,1000] 77) and 
    36   projval ("proj _ _ _" [1000,77,1000] 77) and 
    33   length ("len _" [78] 73) and
    37   length ("len _" [78] 73) and
    34 
    38 
    35   Prf ("\<triangleright> _ : _" [75,75] 75) and  
    39   Prf ("\<triangleright> _ : _" [75,75] 75) and  
    36   PMatch ("_ : _ \<rightarrow> _" [75,75,75] 75)
    40   PMatch ("_ : _ \<rightarrow> _" [75,75,75] 75)
    37   (* and ValOrd ("_ \<succeq>\<^bsub>_\<^esub> _" [78,77,77] 73) *)
    41   (* and ValOrd ("_ \<succeq>\<^bsub>_\<^esub> _" [78,77,77] 73) *)
       
    42 
       
    43 definition 
       
    44   "match r s \<equiv> nullable (ders s r)"
       
    45 
    38 (*>*)
    46 (*>*)
    39 
    47 
    40 section {* Introduction *}
    48 section {* Introduction *}
    41 
    49 
    42 
    50 
   215   
   223   
   216   \noindent 
   224   \noindent 
   217   For semantic derivatives we have the following equations (for example
   225   For semantic derivatives we have the following equations (for example
   218   \cite{Krauss2011}):
   226   \cite{Krauss2011}):
   219 
   227 
   220   \begin{equation}
   228   \begin{equation}\label{SemDer}
   221   \begin{array}{lcl}
   229   \begin{array}{lcl}
   222   @{thm (lhs) Der_null}  & \dn & @{thm (rhs) Der_null}\\
   230   @{thm (lhs) Der_null}  & \dn & @{thm (rhs) Der_null}\\
       
   231   @{thm (lhs) Der_empty}  & \dn & @{thm (rhs) Der_empty}\\
       
   232   @{thm (lhs) Der_char}  & \dn & @{thm (rhs) Der_char}\\
       
   233   @{thm (lhs) Der_union}  & \dn & @{thm (rhs) Der_union}\\
       
   234   @{thm (lhs) Der_Sequ}  & \dn & @{thm (rhs) Der_Sequ}\\
       
   235   @{thm (lhs) Der_star}  & \dn & @{thm (rhs) Der_star}
   223   \end{array}
   236   \end{array}
   224   \end{equation}
   237   \end{equation}
   225 
   238 
   226 
   239 
   227   \noindent \emph{\Brz's derivatives} of regular expressions
   240   \noindent \emph{\Brz's derivatives} of regular expressions
   245   @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
   258   @{thm (lhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(4)[of c "r\<^sub>1" "r\<^sub>2"]}\\
   246   @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
   259   @{thm (lhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) der.simps(5)[of c "r\<^sub>1" "r\<^sub>2"]}\\
   247   @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}
   260   @{thm (lhs) der.simps(6)} & $\dn$ & @{thm (rhs) der.simps(6)}
   248   \end{tabular}
   261   \end{tabular}
   249   \end{center}
   262   \end{center}
   250 
   263  
   251   \noindent
   264   \noindent
   252   Given the equations in ???, 
   265   We may extend this definition to give derivatives w.r.t.~strings:
   253   it is a relatively easy exercise in mechanical reasoning to establish that
   266 
   254 
   267   \begin{center}
   255   \begin{proposition} 
   268   \begin{tabular}{lcl}
       
   269   @{thm (lhs) ders.simps(1)} & $\dn$ & @{thm (rhs) ders.simps(1)}\\
       
   270   @{thm (lhs) ders.simps(2)} & $\dn$ & @{thm (rhs) ders.simps(2)}\\
       
   271   \end{tabular}
       
   272   \end{center}
       
   273 
       
   274   \noindent Given the equations in \eqref{SemDer}, it is a relatively easy
       
   275   exercise in mechanical reasoning to establish that
       
   276 
       
   277   \begin{proposition}\mbox{}\\ 
   256   \begin{tabular}{ll}
   278   \begin{tabular}{ll}
   257   @{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if
   279   @{text "(1)"} & @{thm (lhs) nullable_correctness} if and only if
   258   @{thm (rhs) nullable_correctness} \\ 
   280   @{thm (rhs) nullable_correctness} \\ 
   259   @{text "(2)"} & @{thm[mode=IfThen] der_correctness}
   281   @{text "(2)"} & @{thm[mode=IfThen] der_correctness}
   260   \end{tabular}
   282   \end{tabular}
   262 
   284 
   263   \noindent With this in place it is also very routine to prove that the
   285   \noindent With this in place it is also very routine to prove that the
   264   regular expression matcher defined as
   286   regular expression matcher defined as
   265 
   287 
   266   \begin{center}
   288   \begin{center}
   267 
   289   @{thm match_def}
   268   \end{center}
   290   \end{center}
   269 
   291 
   270   While the matcher above calculates a provably correct a YES/NO answer for
   292   \noindent gives a positive answer if and only if @{term "s \<in> L r"}. While
       
   293   the matcher above calculates a provably correct a YES/NO answer for
   271   whether a regular expression matches a string, the novel idea of Sulzmann
   294   whether a regular expression matches a string, the novel idea of Sulzmann
   272   and Lu \cite{Sulzmann2014} is to append another phase to calculate a
   295   and Lu \cite{Sulzmann2014} is to append another phase to calculate a
   273   value.
   296   value. We will explain the details next.
   274 
   297 
   275 *}
   298 *}
   276 
   299 
   277 section {* POSIX Regular Expression Matching *}
   300 section {* POSIX Regular Expression Matching *}
   278 
   301 
   291   @{term "Right v"} $\mid$
   314   @{term "Right v"} $\mid$
   292   @{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ 
   315   @{term "Seq v\<^sub>1 v\<^sub>2"} $\mid$ 
   293   @{term "Stars vs"} 
   316   @{term "Stars vs"} 
   294   \end{center}  
   317   \end{center}  
   295 
   318 
   296 The inhabitation relation:
   319   \noindent where we use @{term vs} standing for a list of values. The string
       
   320   underlying a values can be calculated by the @{const flat} function, written
       
   321   @{term "flat DUMMY"} and defined as:
       
   322 
       
   323   \begin{center}
       
   324   \begin{tabular}{lcl}
       
   325   @{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\
       
   326   @{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\
       
   327   @{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\
       
   328   @{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)}\\
       
   329   @{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
       
   330   @{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\
       
   331   @{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\
       
   332   \end{tabular}
       
   333   \end{center}
       
   334 
       
   335   \noindent Sulzmann and Lu also define inductively an inhabitation relation
       
   336   that associates values to regular expressions:
   297 
   337 
   298   \begin{center}
   338   \begin{center}
   299   \begin{tabular}{c}
   339   \begin{tabular}{c}
   300   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ 
   340   @{thm[mode=Rule] Prf.intros(1)[of "v\<^sub>1" "r\<^sub>1" "v\<^sub>2" "r\<^sub>2"]}\medskip\\ 
   301   @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad 
   341   @{thm[mode=Rule] Prf.intros(2)[of "v\<^sub>1" "r\<^sub>1" "r\<^sub>2"]} \qquad 
   305   @{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad 
   345   @{thm[mode=Axiom] Prf.intros(6)[of "r"]} \qquad 
   306   @{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\
   346   @{thm[mode=Rule] Prf.intros(7)[of "v" "r" "vs"]}\medskip\\
   307   \end{tabular}
   347   \end{tabular}
   308   \end{center}
   348   \end{center}
   309 
   349 
   310 
   350   \noindent Note that no values are associated with the regular expression
   311 Note that no values are associated with the regular expression $Zero$, and
   351   @{term ZERO}, and that the only value associated with the regular
   312 that the only value associated with the regular expression $One$ is $Void$,
   352   expression @{term ONE} is @{term Void}, pronounced (if one must) as {\em
   313 pronounced (if one must) as {\em ``Void''}. We use $\in$ for the usual
   353   ``Void''}. It is routine to stablish how values `inhabitated' by a regular
   314 membership relation from set theory and take $[]$ as the standard name for
   354   expression correspond to the language of a regular expression, namely
   315 the empty string (rather than, as in \cite{Sulzmann2014}, the regular
   355 
   316 expression that we call $One$).
   356   \begin{proposition}
   317 
   357   @{thm L_flat_Prf}
   318 We begin with the case of a nullable regular expression: from
   358   \end{proposition}
   319 the nullability we need to construct a value that witnesses the nullability.
   359 
   320 The @{const mkeps} function (from \cite{Sulzmann2014})
   360   Graphically the algorithm by Sulzmann \& Lu can be illustrated by the
   321 is a partial (but unambiguous) function from regular expressions to values,
   361   picture in Figure~\ref{Sulz} where the path from the left to the right
   322 total on exactly the set of nullable regular expressions.
   362   involving $der/nullable$ is the first phase of the algorithm and
       
   363   $mkeps/inj$, the path from right to left, the second phase. This picture
       
   364   shows the steps required when a regular expression, say $r_1$, matches the
       
   365   string $abc$. We first build the three derivatives (according to $a$, $b$
       
   366   and $c$). We then use $nullable$ to find out whether the resulting regular
       
   367   expression can match the empty string. If yes, we call the function
       
   368   $mkeps$.
       
   369 
       
   370 \begin{figure}[t]
       
   371 \begin{center}
       
   372 \begin{tikzpicture}[scale=2,node distance=1.2cm,
       
   373                     every node/.style={minimum size=7mm}]
       
   374 \node (r1)  {@{term "r\<^sub>1"}};
       
   375 \node (r2) [right=of r1]{@{term "r\<^sub>2"}};
       
   376 \draw[->,line width=1mm](r1)--(r2) node[above,midway] {@{term "der a DUMMY"}};
       
   377 \node (r3) [right=of r2]{@{term "r\<^sub>3"}};
       
   378 \draw[->,line width=1mm](r2)--(r3) node[above,midway] {@{term "der b DUMMY"}};
       
   379 \node (r4) [right=of r3]{@{term "r\<^sub>4"}};
       
   380 \draw[->,line width=1mm](r3)--(r4) node[above,midway] {@{term "der c DUMMY"}};
       
   381 \draw (r4) node[anchor=west] {\;\raisebox{3mm}{@{term nullable}}};
       
   382 \node (v4) [below=of r4]{@{term "v\<^sub>4"}};
       
   383 \draw[->,line width=1mm](r4) -- (v4);
       
   384 \node (v3) [left=of v4] {@{term "v\<^sub>3"}};
       
   385 \draw[->,line width=1mm](v4)--(v3) node[below,midway] {@{term "inj c"}};
       
   386 \node (v2) [left=of v3]{@{term "v\<^sub>2"}};
       
   387 \draw[->,line width=1mm](v3)--(v2) node[below,midway] {@{term "inj b"}};
       
   388 \node (v1) [left=of v2] {@{term "v\<^sub>1"}};
       
   389 \draw[->,line width=1mm](v2)--(v1) node[below,midway] {@{term "inj a"}};
       
   390 \draw (r4) node[anchor=north west] {\;\raisebox{-8mm}{@{term "mkeps"}}};
       
   391 \end{tikzpicture}
       
   392 \end{center}
       
   393 \caption{The two phases of the algorithm by Sulzmann \& Lu \cite{Sulzmann2014}
       
   394 analysing the string @{term "[a,b,c]"}. The first phase (the arrows from 
       
   395 left to right) is \Brz's matcher building succesive derivatives. If at the 
       
   396 last regular expression is @{term nullable}, then functions of the 
       
   397 second phase are called: first @{term mkeps} calculates a value witnessing
       
   398 how the empty string has been recognised by @{term "r\<^sub>4"}. After
       
   399 that the function @{term inj} `injects back' the characters of the string into
       
   400 the values (the arrows from right to left).
       
   401 \label{Sulz}}
       
   402 \end{figure} 
       
   403 
       
   404  NOT DONE YET 
       
   405 
       
   406   We begin with the case of a nullable regular expression: from the
       
   407   nullability we need to construct a value that witnesses the nullability.
       
   408   The @{const mkeps} function (from \cite{Sulzmann2014}) is a partial (but
       
   409   unambiguous) function from regular expressions to values, total on exactly
       
   410   the set of nullable regular expressions.
   323 
   411 
   324  \begin{center}
   412  \begin{center}
   325   \begin{tabular}{lcl}
   413   \begin{tabular}{lcl}
   326   @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
   414   @{thm (lhs) mkeps.simps(1)} & $\dn$ & @{thm (rhs) mkeps.simps(1)}\\
   327   @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
   415   @{thm (lhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]} & $\dn$ & @{thm (rhs) mkeps.simps(2)[of "r\<^sub>1" "r\<^sub>2"]}\\
   385 
   473 
   386   
   474   
   387 
   475 
   388  
   476  
   389 
   477 
   390   \noindent
   478  
   391   The @{const flat} function for values
       
   392 
       
   393   \begin{center}
       
   394   \begin{tabular}{lcl}
       
   395   @{thm (lhs) flat.simps(1)} & $\dn$ & @{thm (rhs) flat.simps(1)}\\
       
   396   @{thm (lhs) flat.simps(2)} & $\dn$ & @{thm (rhs) flat.simps(2)}\\
       
   397   @{thm (lhs) flat.simps(3)} & $\dn$ & @{thm (rhs) flat.simps(3)}\\
       
   398   @{thm (lhs) flat.simps(4)} & $\dn$ & @{thm (rhs) flat.simps(4)}\\
       
   399   @{thm (lhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]} & $\dn$ & @{thm (rhs) flat.simps(5)[of "v\<^sub>1" "v\<^sub>2"]}\\
       
   400   @{thm (lhs) flat.simps(6)} & $\dn$ & @{thm (rhs) flat.simps(6)}\\
       
   401   @{thm (lhs) flat.simps(7)} & $\dn$ & @{thm (rhs) flat.simps(7)}\\
       
   402   \end{tabular}
       
   403   \end{center}
       
   404 
   479 
   405   \noindent
   480   \noindent
   406   The @{const mkeps} function
   481   The @{const mkeps} function
   407 
   482 
   408  
   483