thys2/Paper/Paper.thy
changeset 418 41a2a3b63853
parent 416 57182b36ec01
child 420 b66a4305749c
equal deleted inserted replaced
416:57182b36ec01 418:41a2a3b63853
    15   Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) 
    15   Cons ("_\<^latex>\<open>\\mbox{$\\,$}\<close>::\<^latex>\<open>\\mbox{$\\,$}\<close>_" [75,73] 73) 
    16 
    16 
    17 
    17 
    18 abbreviation 
    18 abbreviation 
    19   "der_syn r c \<equiv> der c r"
    19   "der_syn r c \<equiv> der c r"
       
    20 abbreviation 
       
    21   "bder_syn r c \<equiv> bder c r"  
    20 
    22 
    21 notation (latex output)
    23 notation (latex output)
    22   der_syn ("_\\_" [79, 1000] 76) and
    24   der_syn ("_\\_" [79, 1000] 76) and
       
    25   bder_syn ("_\\_" [79, 1000] 76) and
    23 
    26 
    24   ZERO ("\<^bold>0" 81) and 
    27   ZERO ("\<^bold>0" 81) and 
    25   ONE ("\<^bold>1" 81) and 
    28   ONE ("\<^bold>1" 81) and 
    26   CH ("_" [1000] 80) and
    29   CH ("_" [1000] 80) and
    27   ALT ("_ + _" [77,77] 78) and
    30   ALT ("_ + _" [77,77] 78) and
    52   ASTAR ("STAR _ _" [79, 79] 78) and
    55   ASTAR ("STAR _ _" [79, 79] 78) and
    53 
    56 
    54   code ("code _" [79] 74) and
    57   code ("code _" [79] 74) and
    55   intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
    58   intern ("_\<^latex>\<open>\\mbox{$^\\uparrow$}\<close>" [900] 80) and
    56   erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
    59   erase ("_\<^latex>\<open>\\mbox{$^\\downarrow$}\<close>" [1000] 74) and
    57   bnullable ("nullable\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
    60   bnullable ("bnullable _" [1000] 80) and
    58   bmkeps ("mkeps\<^latex>\<open>\\mbox{$_b$}\<close> _" [1000] 80) and
    61   bmkeps ("bmkeps _" [1000] 80) and
    59 
    62 
    60   srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) 
    63   srewrite ("_\<^latex>\<open>\\mbox{$\\,\\stackrel{s}{\\leadsto}$}\<close> _" [71, 71] 80) and
       
    64   rrewrites ("_ \<^latex>\<open>\\mbox{$\\,\\leadsto^*$}\<close> _" [71, 71] 80) and
       
    65   blexer_simp ("blexer\<^sup>+" 1000) 
    61 
    66 
    62 
    67 
    63 lemma better_retrieve:
    68 lemma better_retrieve:
    64    shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
    69    shows "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Left v) = bs @ retrieve r v"
    65    and   "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
    70    and   "rs \<noteq> Nil ==> retrieve (AALTs bs (r#rs)) (Right v) = bs @ retrieve (AALTs [] rs) v"
   261                & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
   266                & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
   262                & $\mid$ & @{term "ASTAR bs r"}
   267                & $\mid$ & @{term "ASTAR bs r"}
   263   \end{tabular}
   268   \end{tabular}
   264   \end{center}
   269   \end{center}
   265 
   270 
   266   \noindent where @{text bs} stands for a bitsequences; @{text r},
   271   \noindent where @{text bs} stands for bitsequences; @{text r},
   267   @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular
   272   @{text "r\<^sub>1"} and @{text "r\<^sub>2"} for bitcoded regular
   268   expressions; and @{text rs} for lists of bitcoded regular
   273   expressions; and @{text rs} for lists of bitcoded regular
   269   expressions. The binary alternative @{text "ALT bs r\<^sub>1 r\<^sub>2"}
   274   expressions. The binary alternative @{text "ALT bs r\<^sub>1 r\<^sub>2"}
   270   is just an abbreviation for @{text "ALTs bs [r\<^sub>1, r\<^sub>2]"}. 
   275   is just an abbreviation for @{text "ALTs bs [r\<^sub>1, r\<^sub>2]"}. 
   271   For bitsequences we just use lists made up of the
   276   For bitsequences we just use lists made up of the
   272   constants @{text Z} and @{text S}.  The idea with bitcoded regular
   277   constants @{text Z} and @{text S}.  The idea with bitcoded regular
   273   expressions is to incrementally generate the value information (for
   278   expressions is to incrementally generate the value information (for
   274   example @{text Left} and @{text Right}) as bitsequences
   279   example @{text Left} and @{text Right}) as bitsequences. For this 
   275   as part of the regular expression constructors. 
   280   Sulzmann and Lu define a coding
   276   Sulzmann and Lu then define a coding
       
   277   function for how values can be coded into bitsequences.
   281   function for how values can be coded into bitsequences.
   278 
   282 
   279   \begin{center}
   283   \begin{center}
   280   \begin{tabular}{cc}
   284   \begin{tabular}{cc}
   281   \begin{tabular}{lcl}
   285   \begin{tabular}{lcl}
   295   \end{center}
   299   \end{center}
   296    
   300    
   297   \noindent
   301   \noindent
   298   As can be seen, this coding is ``lossy'' in the sense that we do not
   302   As can be seen, this coding is ``lossy'' in the sense that we do not
   299   record explicitly character values and also not sequence values (for
   303   record explicitly character values and also not sequence values (for
   300   them we just append two bitsequences). We do, however, record the
   304   them we just append two bitsequences). However, the
   301   different alternatives for @{text Left}, respectively @{text Right}, as @{text Z} and
   305   different alternatives for @{text Left}, respectively @{text Right}, are recorded as @{text Z} and
   302   @{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate
   306   @{text S} followed by some bitsequence. Similarly, we use @{text Z} to indicate
   303   if there is still a value coming in the list of @{text Stars}, whereas @{text S}
   307   if there is still a value coming in the list of @{text Stars}, whereas @{text S}
   304   indicates the end of the list. The lossiness makes the process of
   308   indicates the end of the list. The lossiness makes the process of
   305   decoding a bit more involved, but the point is that if we have a
   309   decoding a bit more involved, but the point is that if we have a
   306   regular expression \emph{and} a bitsequence of a corresponding value,
   310   regular expression \emph{and} a bitsequence of a corresponding value,
   402   the functions \textit{bnullable} and \textit{bmkeps}, which are the
   406   the functions \textit{bnullable} and \textit{bmkeps}, which are the
   403   ``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on
   407   ``lifted'' versions of \textit{nullable} and \textit{mkeps} acting on
   404   bitcoded regular expressions, instead of regular expressions.
   408   bitcoded regular expressions, instead of regular expressions.
   405 
   409 
   406   \begin{center}
   410   \begin{center}
   407   \begin{tabular}{lcl}
   411   \begin{tabular}{@ {}c@ {}c@ {}}
   408   $\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{false}$ \textbf{fix}\\
   412   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   413   $\textit{bnullable}\,(\textit{ZERO})$ & $\dn$ & $\textit{false}$\\
   409   $\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{true}$\\
   414   $\textit{bnullable}\,(\textit{ONE}\,bs)$ & $\dn$ & $\textit{true}$\\
   410   $\textit{bnullable}\,(\textit{CHAR}\,bs\,c)$ & $\dn$ & $\textit{false}$\\
   415   $\textit{bnullable}\,(\textit{CHAR}\,bs\,c)$ & $\dn$ & $\textit{false}$\\
   411   $\textit{bnullable}\,(\textit{ALT}\,bs\,a_1\,a_2)$ & $\dn$ &
   416   $\textit{bnullable}\,(\textit{ALTs}\,bs\,\rs)$ & $\dn$ &
   412      $\textit{bnullable}\,a_1\vee \textit{bnullable}\,a_2$\\
   417      $\exists\, r \in \rs. \,\textit{bnullable}\,r$\\
   413   $\textit{bnullable}\,(\textit{SEQ}\,bs\,a_1\,a_2)$ & $\dn$ &
   418   $\textit{bnullable}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &
   414      $\textit{bnullable}\,a_1\wedge \textit{bnullable}\,a_2$\\
   419      $\textit{bnullable}\,r_1\wedge \textit{bnullable}\,r_2$\\
   415   $\textit{bnullable}\,(\textit{STAR}\,bs\,a)$ & $\dn$ &
   420   $\textit{bnullable}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
   416      $\textit{true}$
   421      $\textit{true}$
   417   \end{tabular}    
   422   \end{tabular}
   418   \end{center}    
   423   &
   419 
   424   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
   420   \begin{center}
   425   $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$\\
   421   \begin{tabular}{lcl}
   426   $\textit{bmkeps}\,(\textit{ALTs}\,bs\,r\!::\!\rs)$ & $\dn$ &
   422   $\textit{bmkeps}\,(\textit{ONE}\,bs)$ & $\dn$ & $bs$ \textbf{fix}\\
   427      $\textit{if}\;\textit{bnullable}\,r$\\
   423   $\textit{bmkeps}\,(\textit{ALT}\,bs\,a_1\,a_2)$ & $\dn$ &
   428   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,r$\\
   424      $\textit{if}\;\textit{bnullable}\,a_1$\\
   429   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,\rs$\\
   425   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a_1$\\
   430   $\textit{bmkeps}\,(\textit{SEQ}\,bs\,r_1\,r_2)$ & $\dn$ &\\
   426   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,a_2$\\
   431   \multicolumn{3}{r}{$bs \,@\,\textit{bmkeps}\,r_1\,@\, \textit{bmkeps}\,r_2$}\\
   427   $\textit{bmkeps}\,(\textit{SEQ}\,bs\,a_1\,a_2)$ & $\dn$ &
   432   $\textit{bmkeps}\,(\textit{STAR}\,bs\,r)$ & $\dn$ &
   428      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
       
   429   $\textit{bmkeps}\,(\textit{STAR}\,bs\,a)$ & $\dn$ &
       
   430      $bs \,@\, [\S]$
   433      $bs \,@\, [\S]$
   431   \end{tabular}    
   434   \end{tabular}
       
   435   \end{tabular}
   432   \end{center}    
   436   \end{center}    
   433  
   437  
   434 
   438 
   435   \noindent
   439   \noindent
   436   The key function in the bitcoded algorithm is the derivative of an
   440   The key function in the bitcoded algorithm is the derivative of an
   437   annotated regular expression. This derivative calculates the
   441   bitcoded regular expression. This derivative calculates the
   438   derivative but at the same time also the incremental part that
   442   derivative but at the same time also the incremental part of bitsequences
   439   contributes to constructing a value.	
   443   that contribute to constructing a POSIX value.	
   440 
   444 
   441   \begin{center}
   445   \begin{center}
   442   \begin{tabular}{@ {}lcl@ {}}
   446   \begin{tabular}{@ {}lcl@ {}}
   443   $(\textit{ZERO})\backslash c$ & $\dn$ & $\textit{ZERO}$ \textbf{fix}\\  
   447   $(\textit{ZERO})\backslash c$ & $\dn$ & $\textit{ZERO}$ \\  
   444   $(\textit{ONE}\;bs)\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   448   $(\textit{ONE}\;bs)\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   445   $(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ &
   449   $(\textit{CHAR}\;bs\,d)\backslash c$ & $\dn$ &
   446         $\textit{if}\;c=d\; \;\textit{then}\;
   450         $\textit{if}\;c=d\; \;\textit{then}\;
   447          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
   451          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
   448   $(\textit{ALT}\;bs\,a_1\,a_2)\backslash c$ & $\dn$ &
   452   $(\textit{ALTs}\;bs\,\rs)\backslash c$ & $\dn$ &
   449         $\textit{ALT}\,bs\,(a_1\backslash c)\,(a_2\backslash c)$\\
   453         $\textit{ALTs}\,bs\,(\mathit{map}\,(\_\backslash c)\,\rs)$\\
   450   $(\textit{SEQ}\;bs\,a_1\,a_2)\backslash c$ & $\dn$ &
   454   $(\textit{SEQ}\;bs\,r_1\,r_2)\backslash c$ & $\dn$ &
   451      $\textit{if}\;\textit{bnullable}\,a_1$\\
   455      $\textit{if}\;\textit{bnullable}\,r_1$\\
   452   & &$\textit{then}\;\textit{ALT}\,bs\,(\textit{SEQ}\,[]\,(a_1\backslash c)\,a_2)$\\
   456   & &$\textit{then}\;\textit{ALT}\,bs\,(\textit{SEQ}\,[]\,(r_1\backslash c)\,r_2)$\\
   453   & &$\phantom{\textit{then}\;\textit{ALT}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\backslash c))$\\
   457   & &$\phantom{\textit{then}\;\textit{ALT}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,r_1)\,(r_2\backslash c))$\\
   454   & &$\textit{else}\;\textit{SEQ}\,bs\,(a_1\backslash c)\,a_2$\\
   458   & &$\textit{else}\;\textit{SEQ}\,bs\,(r_1\backslash c)\,r_2$\\
   455   $(\textit{STAR}\,bs\,a)\backslash c$ & $\dn$ &
   459   $(\textit{STAR}\,bs\,r)\backslash c$ & $\dn$ &
   456       $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\backslash c))\,
   460       $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\backslash c))\,
   457        (\textit{STAR}\,[]\,r)$
   461        (\textit{STAR}\,[]\,r)$
   458   \end{tabular}    
   462   \end{tabular}    
   459   \end{center}
   463   \end{center}
   460 
   464 
   461 
   465 
   462   \noindent
   466   \noindent
   463   This function can also be extended to strings, written $a\backslash s$,
   467   This function can also be extended to strings, written $r\backslash s$,
   464   just like the standard derivative.  We omit the details. Finally we
   468   just like the standard derivative.  We omit the details. Finally we
   465   can define Sulzmann and Lu's bitcoded lexer, which we call \textit{blexer}: 
   469   can define Sulzmann and Lu's bitcoded lexer, which we call \textit{blexer}:
       
   470 
       
   471   \begin{center}
       
   472 \begin{tabular}{lcl}
       
   473   $\textit{blexer}\;r\,s$ & $\dn$ &
       
   474       $\textit{let}\;r_{der} = (r^\uparrow)\backslash s\;\textit{in}$\\                
       
   475   & & $\;\;\;\;\textit{if}\; \textit{bnullable}(r_{der}) \;\;\textit{then}\;\textit{decode}\,(\textit{bmkeps}\,r_{der})\,r
       
   476        \;\;\textit{else}\;\textit{None}$
       
   477 \end{tabular}
       
   478 \end{center}
   466 
   479 
   467   \noindent
   480   \noindent
   468 This bitcoded lexer first internalises the regular expression $r$ and then
   481 This bitcoded lexer first internalises the regular expression $r$ and then
   469 builds the annotated derivative according to $s$. If the derivative is
   482 builds the bitcoded derivative according to $s$. If the derivative is
   470 nullable, then it extracts the bitcoded value using the
   483 (b)nullable the string is in the language of $r$ and it extracts the bitsequence using the
   471 $\textit{bmkeps}$ function. Finally it decodes the bitcoded value.  If
   484 $\textit{bmkeps}$ function. Finally it decodes the bitsequence into a value.  If
   472 the derivative is \emph{not} nullable, then $\textit{None}$ is
   485 the derivative is \emph{not} nullable, then $\textit{None}$ is
   473 returned. The task is to show that this way of calculating a value
   486 returned. We can show that this way of calculating a value
   474 generates the same result as with \textit{lexer}.
   487 generates the same result as with \textit{lexer}.
   475 
   488 
   476 Before we can proceed we need to define a function, called
   489 Before we can proceed we need to define a helper function, called
   477 \textit{retrieve}, which Sulzmann and Lu introduced for the proof.
   490 \textit{retrieve}, which Sulzmann and Lu introduced for the correctness proof.
   478 
   491 
   479 \textbf{fix}
   492 \begin{center}
   480 
       
   481 \noindent
       
   482 The idea behind this function is to retrieve a possibly partial
       
   483 bitcode from an annotated regular expression, where the retrieval is
       
   484 guided by a value.  For example if the value is $\Left$ then we
       
   485 descend into the left-hand side of an alternative (annotated) regular
       
   486 expression in order to assemble the bitcode. Similarly for
       
   487 $\Right$. The property we can show is that for a given $v$ and $r$
       
   488 with $\vdash v : r$, the retrieved bitsequence from the internalised
       
   489 regular expression is equal to the bitcoded version of $v$.
       
   490 
       
   491 \begin{lemma}\label{retrievecode}
       
   492 If $\vdash v : r$ then $\textit{code}\, v = \textit{retrieve}\,(r^\uparrow)\,v$.
       
   493 \end{lemma}
       
   494 
       
   495 *}
       
   496 
       
   497 text {*
       
   498   There is also a corresponding decoding function that takes a bitsequence
       
   499   and generates back a value. However, since the bitsequences are a ``lossy''
       
   500   coding (@{term Seq}s are not coded) the decoding function depends also
       
   501   on a regular expression in order to decode values. 
       
   502 
       
   503 
       
   504 
       
   505 
       
   506   The idea of the bitcodes is to annotate them to regular expressions and generate values
       
   507   incrementally. The bitcodes can be read off from the @{text breg} and then decoded into a value.
       
   508 
       
   509   \begin{center}
       
   510   \begin{tabular}{lcl}
       
   511   @{term breg} & $::=$ & @{term "AZERO"}\\
       
   512                & $\mid$ & @{term "AONE bs"}\\
       
   513                & $\mid$ & @{term "ACHAR bs c"}\\
       
   514                & $\mid$ & @{term "AALTs bs rs"}\\
       
   515                & $\mid$ & @{term "ASEQ bs r\<^sub>1 r\<^sub>2"}\\
       
   516                & $\mid$ & @{term "ASTAR bs r"}
       
   517   \end{tabular}
       
   518   \end{center}
       
   519 
       
   520 
       
   521 
       
   522   \begin{center}
       
   523   \begin{tabular}{lcl}
   493   \begin{tabular}{lcl}
   524   @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
   494   @{thm (lhs) retrieve.simps(1)} & $\dn$ & @{thm (rhs) retrieve.simps(1)}\\
   525   @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
   495   @{thm (lhs) retrieve.simps(2)} & $\dn$ & @{thm (rhs) retrieve.simps(2)}\\
   526   @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
   496   @{thm (lhs) retrieve.simps(3)} & $\dn$ & @{thm (rhs) retrieve.simps(3)}\\
   527   @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
   497   @{thm (lhs) better_retrieve(1)} & $\dn$ & @{thm (rhs) better_retrieve(1)}\\
   531   @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
   501   @{thm (lhs) retrieve.simps(7)} & $\dn$ & @{thm (rhs) retrieve.simps(7)}\\
   532   @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}
   502   @{thm (lhs) retrieve.simps(8)} & $\dn$ & @{thm (rhs) retrieve.simps(8)}
   533   \end{tabular}
   503   \end{tabular}
   534   \end{center}
   504   \end{center}
   535 
   505 
   536 
   506 \noindent
   537   \begin{theorem}
   507 The idea behind this function is to retrieve a possibly partial
   538   @{thm blexer_correctness} 
   508 bitcode from a bitcoded regular expression, where the retrieval is
   539   \end{theorem}
   509 guided by a value.  For example if the value is $\Left$ then we
   540 
   510 descend into the left-hand side of an alternative in order to
   541 
   511 assemble the bitcode. Similarly for
   542   bitcoded regexes / decoding / bmkeps
   512 $\Right$. The property we can show is that for a given $v$ and $r$
   543   gets rid of the second phase (only single phase)   
   513 with $\vdash v : r$, the retrieved bitsequence from the internalised
   544   correctness
   514 regular expression is equal to the bitcoded version of $v$.
       
   515 
       
   516 \begin{lemma}\label{retrievecode}
       
   517 If $\vdash v : r$ then $\textit{code}\, v = \textit{retrieve}\,(r^\uparrow)\,v$.
       
   518 \end{lemma}
       
   519 
       
   520 \noindent
       
   521 We also need some auxiliary facts about how the bitcoded operations
       
   522 relate to the ``standard'' operations on regular expressions. For
       
   523 example if we build a bitcoded derivative and erase the result, this
       
   524 is the same as if we first erase the bitcoded regular expression and
       
   525 then perform the ``standard'' derivative operation.
       
   526 
       
   527 \begin{lemma}\label{bnullable}\mbox{}\smallskip\\
       
   528   \begin{tabular}{ll}
       
   529 \textit{(1)} & $(a\backslash s)^\downarrow = (a^\downarrow)\backslash s$\\    
       
   530 \textit{(2)} & $\textit{bnullable}(a)$ iff $\textit{nullable}(a^\downarrow)$\\
       
   531 \textit{(3)} & $\textit{bmkeps}(a) = \textit{retrieve}\,a\,(\textit{mkeps}\,(a^\downarrow))$ provided $\textit{nullable}(a^\downarrow)$.
       
   532 \end{tabular}  
       
   533 \end{lemma}
       
   534 
       
   535 \begin{proof}
       
   536   All properties are by induction on annotated regular expressions. There are no
       
   537   interesting cases.
       
   538 \end{proof}
       
   539 
       
   540 \noindent
       
   541 This brings us to our main lemma in this section: if we build a
       
   542 derivative, say $r\backslash s$ and have a value, say $v$, inhabited
       
   543 by this derivative, then we can produce the result $\lexer$ generates
       
   544 by applying this value to the stacked-up injection functions
       
   545 $\textit{flex}$ assembles. The lemma establishes that this is the same
       
   546 value as if we build the annotated derivative $r^\uparrow\backslash s$
       
   547 and then retrieve the corresponding bitcoded version, followed by a
       
   548 decoding step.
       
   549 
       
   550 \begin{lemma}[Main Lemma]\label{mainlemma}\it
       
   551 If $\vdash v : r\backslash s$ then 
       
   552 \[\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,v) =
       
   553   \textit{decode}(\textit{retrieve}\,(r^\uparrow \backslash s)\,v)\,r\]
       
   554 \end{lemma}  
       
   555 
       
   556 \begin{proof}
       
   557   This can be proved by induction on $s$ and generalising over
       
   558   $v$. The interesting point is that we need to prove this in the
       
   559   reverse direction for $s$. This means instead of cases $[]$ and
       
   560   $c\!::\!s$, we have cases $[]$ and $s\,@\,[c]$ where we unravel the
       
   561   string from the back.\footnote{Isabelle/HOL provides an induction principle
       
   562     for this way of performing the induction.}
       
   563 
       
   564   The case for $[]$ is routine using Lemmas~\ref{codedecode}
       
   565   and~\ref{retrievecode}. In the case $s\,@\,[c]$, we can infer from
       
   566   the assumption that $\vdash v : (r\backslash s)\backslash c$
       
   567   holds. Hence by Lemma~\ref{Posix2} we know that 
       
   568   (*) $\vdash \inj\,(r\backslash s)\,c\,v : r\backslash s$ holds too.
       
   569   By definition of $\textit{flex}$ we can unfold the left-hand side
       
   570   to be
       
   571   \[
       
   572     \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,(s\,@\,[c])\,v) =
       
   573     \textit{Some}\,(\textit{flex}\;r\,\textit{id}\,s\,(\inj\,(r\backslash s)\,c\,v))  
       
   574   \]  
       
   575 
       
   576   \noindent
       
   577   By induction hypothesis and (*) we can rewrite the right-hand side to
       
   578 
       
   579   \[
       
   580     \textit{decode}\,(\textit{retrieve}\,(r^\uparrow\backslash s)\;
       
   581     (\inj\,(r\backslash s)\,c\,\,v))\,r
       
   582   \]
       
   583 
       
   584   \noindent
       
   585   which is equal to
       
   586   $\textit{decode}\,(\textit{retrieve}\, (r^\uparrow\backslash
       
   587   (s\,@\,[c]))\,v)\,r$ as required. The last rewrite step is possible
       
   588   because we generalised over $v$ in our induction.
       
   589 \end{proof}  
       
   590 
       
   591 \noindent
       
   592 With this lemma in place, we can prove the correctness of \textit{blexer} such
       
   593 that it produces the same result as \textit{lexer}.
       
   594 
       
   595 
       
   596 \begin{theorem}
       
   597 $\textit{lexer}\,r\,s = \textit{blexer}\,r\,s$
       
   598 \end{theorem}  
       
   599 
       
   600 \begin{proof}
       
   601   We can first expand both sides using Lemma~\ref{flex} and the
       
   602   definition of \textit{blexer}. This gives us two
       
   603   \textit{if}-statements, which we need to show to be equal. By 
       
   604   Lemma~\ref{bnullable}\textit{(2)} we know the \textit{if}-tests coincide:
       
   605   \[
       
   606     \textit{bnullable}(r^\uparrow\backslash s) \;\textit{iff}\;
       
   607     \nullable(r\backslash s)
       
   608   \]
       
   609 
       
   610   \noindent
       
   611   For the \textit{if}-branch suppose $r_d \dn r^\uparrow\backslash s$ and
       
   612   $d \dn r\backslash s$. We have (*) $\nullable\,d$. We can then show
       
   613   by Lemma~\ref{bnullable}\textit{(3)} that
       
   614   %
       
   615   \[
       
   616     \textit{decode}(\textit{bmkeps}\,r_d)\,r =
       
   617     \textit{decode}(\textit{retrieve}\,a\,(\textit{mkeps}\,d))\,r
       
   618   \]
       
   619 
       
   620   \noindent
       
   621   where the right-hand side is equal to
       
   622   $\textit{Some}\,(\textit{flex}\,r\,\textit{id}\,s\,(\textit{mkeps}\,
       
   623   d))$ by Lemma~\ref{mainlemma} (we know
       
   624   $\vdash \textit{mkeps}\,d : d$ by (*)).  This shows the
       
   625   \textit{if}-branches return the same value. In the
       
   626   \textit{else}-branches both \textit{lexer} and \textit{blexer} return
       
   627   \textit{None}. Therefore we can conclude the proof.
       
   628 \end{proof}  
       
   629 
       
   630 \noindent
       
   631 This establishes that the bitcoded algorithm by Sulzmann
       
   632 and Lu without simplification produces correct results. This was
       
   633 only conjectured in their paper \cite{Sulzmann2014}. The next step
       
   634 is to add simplifications.
       
   635 
   545 *}
   636 *}
   546 
   637 
   547 
   638 
   548 section {* Simplification *}
   639 section {* Simplification *}
   549 
   640 
   550 text {*
   641 text {*
   551      Sulzmann \& Lu apply simplification via a fixpoint operation; also does not use erase to filter out duplicates.
   642 
       
   643      \begin{lemma}
       
   644      @{thm[mode=IfThen] bnullable0(1)[of "r\<^sub>1" "r\<^sub>2"]}
       
   645      \end{lemma}
       
   646 
       
   647 
       
   648      \begin{lemma}
       
   649      @{thm[mode=IfThen] rewrite_bmkeps_aux(1)[of "r\<^sub>1" "r\<^sub>2"]}
       
   650      \end{lemma}
       
   651 
       
   652      \begin{lemma}
       
   653      @{thm[mode=IfThen] rewrites_to_bsimp}
       
   654      \end{lemma}
       
   655 
       
   656      \begin{lemma}
       
   657      @{thm[mode=IfThen] rewrite_preserves_bder(1)[of "r\<^sub>1" "r\<^sub>2"]}
       
   658      \end{lemma}
       
   659 
       
   660      \begin{lemma}
       
   661      @{thm[mode=IfThen] central}
       
   662      \end{lemma}
       
   663 
       
   664      \begin{theorem}
       
   665      @{thm[mode=IfThen] main_blexer_simp}
       
   666      \end{theorem}
       
   667 
       
   668      Sulzmann \& Lu apply simplification via a fixpoint operation
       
   669 
       
   670      ; also does not use erase to filter out duplicates.
   552   
   671   
   553    not direct correspondence with PDERs, because of example
   672    not direct correspondence with PDERs, because of example
   554    problem with retrieve 
   673    problem with retrieve 
   555 
   674 
   556    correctness
   675    correctness