etnms/etnms.tex
changeset 132 69292c2b54d8
parent 131 b6984212cf87
child 133 0172d422e93e
equal deleted inserted replaced
131:b6984212cf87 132:69292c2b54d8
   471 \begin{center}
   471 \begin{center}
   472 \begin{tabular}{lcl}
   472 \begin{tabular}{lcl}
   473   $\textit{a}$ & $::=$  & $\ZERO$\\
   473   $\textit{a}$ & $::=$  & $\ZERO$\\
   474                   & $\mid$ & $_{bs}\ONE$\\
   474                   & $\mid$ & $_{bs}\ONE$\\
   475                   & $\mid$ & $_{bs}{\bf c}$\\
   475                   & $\mid$ & $_{bs}{\bf c}$\\
   476                   & $\mid$ & $_{bs}\oplus\,as$\\
   476                   & $\mid$ & $_{bs}\sum\,as$\\
   477                   & $\mid$ & $_{bs}a_1\cdot a_2$\\
   477                   & $\mid$ & $_{bs}a_1\cdot a_2$\\
   478                   & $\mid$ & $_{bs}a^*$
   478                   & $\mid$ & $_{bs}a^*$
   479 \end{tabular}    
   479 \end{tabular}    
   480 \end{center}  
   480 \end{center}  
   481 %(in \textit{ALTS})
   481 %(in \textit{ALTS})
   482 
   482 
   483 \noindent
   483 \noindent
   484 where $bs$ stands for bitcodes, $a$  for $\bold{a}$nnotated regular
   484 where $bs$ stands for bitcodes, $a$  for $\bold{a}$nnotated regular
   485 expressions and $as$ for a list of annotated regular expressions.
   485 expressions and $as$ for a list of annotated regular expressions.
   486 The alternative constructor($\oplus$) has been generalized to 
   486 The alternative constructor($\sum$) has been generalized to 
   487 accept a list of annotated regular expressions rather than just 2.
   487 accept a list of annotated regular expressions rather than just 2.
   488 We will show that these bitcodes encode information about
   488 We will show that these bitcodes encode information about
   489 the (POSIX) value that should be generated by the Sulzmann and Lu
   489 the (POSIX) value that should be generated by the Sulzmann and Lu
   490 algorithm.
   490 algorithm.
   491 
   491 
   500 \begin{tabular}{lcl}
   500 \begin{tabular}{lcl}
   501   $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
   501   $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\
   502   $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
   502   $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\
   503   $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
   503   $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\
   504   $(r_1 + r_2)^\uparrow$ & $\dn$ &
   504   $(r_1 + r_2)^\uparrow$ & $\dn$ &
   505   $_{[]}\oplus[\textit{fuse}\,[0]\,r_1^\uparrow,\,
   505   $_{[]}\sum[\textit{fuse}\,[0]\,r_1^\uparrow,\,
   506   \textit{fuse}\,[1]\,r_2^\uparrow]$\\
   506   \textit{fuse}\,[1]\,r_2^\uparrow]$\\
   507   $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
   507   $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
   508          $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
   508          $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\
   509   $(r^*)^\uparrow$ & $\dn$ &
   509   $(r^*)^\uparrow$ & $\dn$ &
   510          $_{[]}(r^\uparrow)^*$\\
   510          $_{[]}(r^\uparrow)^*$\\
   524   $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
   524   $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\
   525   $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
   525   $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ &
   526      $_{bs @ bs'}\ONE$\\
   526      $_{bs @ bs'}\ONE$\\
   527   $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
   527   $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ &
   528      $_{bs@bs'}{\bf c}$\\
   528      $_{bs@bs'}{\bf c}$\\
   529   $\textit{fuse}\;bs\,_{bs'}\oplus\textit{as}$ & $\dn$ &
   529   $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ &
   530      $_{bs@bs'}\oplus\textit{as}$\\
   530      $_{bs@bs'}\sum\textit{as}$\\
   531   $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
   531   $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ &
   532      $_{bs@bs'}a_1 \cdot a_2$\\
   532      $_{bs@bs'}a_1 \cdot a_2$\\
   533   $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
   533   $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ &
   534      $_{bs @ bs'}a^*$
   534      $_{bs @ bs'}a^*$
   535 \end{tabular}    
   535 \end{tabular}    
   593   $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
   593   $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\  
   594   $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
   594   $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\  
   595   $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
   595   $(_{bs}{\bf d})\,\backslash c$ & $\dn$ &
   596         $\textit{if}\;c=d\; \;\textit{then}\;
   596         $\textit{if}\;c=d\; \;\textit{then}\;
   597          _{bs}\ONE\;\textit{else}\;\ZERO$\\  
   597          _{bs}\ONE\;\textit{else}\;\ZERO$\\  
   598   $(_{bs}\oplus \;\textit{as})\,\backslash c$ & $\dn$ &
   598   $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ &
   599   $_{bs}\oplus\;(\textit{as.map}(\backslash c))$\\
   599   $_{bs}\sum\;(\textit{as.map}(\backslash c))$\\
   600   $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
   600   $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ &
   601      $\textit{if}\;\textit{bnullable}\,a_1$\\
   601      $\textit{if}\;\textit{bnullable}\,a_1$\\
   602 					       & &$\textit{then}\;_{bs}\oplus\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
   602 					       & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\
   603 					       & &$\phantom{\textit{then},\;_{bs}\oplus\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
   603 					       & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\
   604   & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
   604   & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\
   605   $(_{bs}a^*)\,\backslash c$ & $\dn$ &
   605   $(_{bs}a^*)\,\backslash c$ & $\dn$ &
   606       $_{bs}(\textit{fuse}\, [0] \; r\,\backslash c)\cdot
   606       $_{bs}(\textit{fuse}\, [0] \; r\,\backslash c)\cdot
   607        (_{[]}r^*))$
   607        (_{[]}r^*))$
   608 \end{tabular}    
   608 \end{tabular}    
   623 \backslash c)$ will collapse the regular expression $a_1$(as it has
   623 \backslash c)$ will collapse the regular expression $a_1$(as it has
   624 already been fully matched) and store the parsing information at the
   624 already been fully matched) and store the parsing information at the
   625 head of the regular expression $a_2 \backslash c$ by fusing to it. The
   625 head of the regular expression $a_2 \backslash c$ by fusing to it. The
   626 bitsequence $\textit{bs}$, which was initially attached to the
   626 bitsequence $\textit{bs}$, which was initially attached to the
   627 first element of the sequence $a_1 \cdot a_2$, has
   627 first element of the sequence $a_1 \cdot a_2$, has
   628 now been elevated to the top-level of $\oplus$, as this information will be
   628 now been elevated to the top-level of $\sum$, as this information will be
   629 needed whichever way the sequence is matched---no matter whether $c$ belongs
   629 needed whichever way the sequence is matched---no matter whether $c$ belongs
   630 to $a_1$ or $ a_2$. After building these derivatives and maintaining all
   630 to $a_1$ or $ a_2$. After building these derivatives and maintaining all
   631 the lexing information, we complete the lexing by collecting the
   631 the lexing information, we complete the lexing by collecting the
   632 bitcodes using a generalised version of the $\textit{mkeps}$ function
   632 bitcodes using a generalised version of the $\textit{mkeps}$ function
   633 for annotated regular expressions, called $\textit{bmkeps}$:
   633 for annotated regular expressions, called $\textit{bmkeps}$:
   635 
   635 
   636 %\begin{definition}[\textit{bmkeps}]\mbox{}
   636 %\begin{definition}[\textit{bmkeps}]\mbox{}
   637 \begin{center}
   637 \begin{center}
   638 \begin{tabular}{lcl}
   638 \begin{tabular}{lcl}
   639   $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
   639   $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\
   640   $\textit{bmkeps}\,(_{bs}\oplus a::\textit{as})$ & $\dn$ &
   640   $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ &
   641      $\textit{if}\;\textit{bnullable}\,a$\\
   641      $\textit{if}\;\textit{bnullable}\,a$\\
   642   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
   642   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
   643   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{bs}\oplus \textit{as})$\\
   643   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{bs}\sum \textit{as})$\\
   644   $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
   644   $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ &
   645      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
   645      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
   646   $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
   646   $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ &
   647      $bs \,@\, [0]$
   647      $bs \,@\, [0]$
   648 \end{tabular}    
   648 \end{tabular}    
   682 that the expected bound can be achieved. Obviously we could only
   682 that the expected bound can be achieved. Obviously we could only
   683 partially cover  the search space as there are infinitely many regular
   683 partially cover  the search space as there are infinitely many regular
   684 expressions and strings. 
   684 expressions and strings. 
   685 
   685 
   686 One modification we introduced is to allow a list of annotated regular
   686 One modification we introduced is to allow a list of annotated regular
   687 expressions in the $\oplus$ constructor. This allows us to not just
   687 expressions in the $\sum$ constructor. This allows us to not just
   688 delete unnecessary $\ZERO$s and $\ONE$s from regular expressions, but
   688 delete unnecessary $\ZERO$s and $\ONE$s from regular expressions, but
   689 also unnecessary ``copies'' of regular expressions (very similar to
   689 also unnecessary ``copies'' of regular expressions (very similar to
   690 simplifying $r + r$ to just $r$, but in a more general setting). Another
   690 simplifying $r + r$ to just $r$, but in a more general setting). Another
   691 modification is that we use simplification rules inspired by Antimirov's
   691 modification is that we use simplification rules inspired by Antimirov's
   692 work on partial derivatives. They maintain the idea that only the first
   692 work on partial derivatives. They maintain the idea that only the first
   705    &&$\quad\textit{case} \; (\_, \ZERO) \Rightarrow  \ZERO$ \\
   705    &&$\quad\textit{case} \; (\_, \ZERO) \Rightarrow  \ZERO$ \\
   706    &&$\quad\textit{case} \;  (\ONE, a_2') \Rightarrow  \textit{fuse} \; bs \;  a_2'$ \\
   706    &&$\quad\textit{case} \;  (\ONE, a_2') \Rightarrow  \textit{fuse} \; bs \;  a_2'$ \\
   707    &&$\quad\textit{case} \; (a_1', \ONE) \Rightarrow  \textit{fuse} \; bs \;  a_1'$ \\
   707    &&$\quad\textit{case} \; (a_1', \ONE) \Rightarrow  \textit{fuse} \; bs \;  a_1'$ \\
   708    &&$\quad\textit{case} \; (a_1', a_2') \Rightarrow   _{bs}a_1' \cdot a_2'$ \\
   708    &&$\quad\textit{case} \; (a_1', a_2') \Rightarrow   _{bs}a_1' \cdot a_2'$ \\
   709 
   709 
   710   $\textit{simp} \; (_{bs}\oplus \textit{as})$ & $\dn$ & $\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $ \\
   710   $\textit{simp} \; (_{bs}\sum \textit{as})$ & $\dn$ & $\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $ \\
   711   &&$\quad\textit{case} \; [] \Rightarrow  \ZERO$ \\
   711   &&$\quad\textit{case} \; [] \Rightarrow  \ZERO$ \\
   712    &&$\quad\textit{case} \; a :: [] \Rightarrow  \textit{fuse bs a}$ \\
   712    &&$\quad\textit{case} \; a :: [] \Rightarrow  \textit{fuse bs a}$ \\
   713    &&$\quad\textit{case} \;  as' \Rightarrow _{bs}\oplus \textit{as'}$\\ 
   713    &&$\quad\textit{case} \;  as' \Rightarrow _{bs}\sum \textit{as'}$\\ 
   714 
   714 
   715    $\textit{simp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
   715    $\textit{simp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
   716 \end{tabular}    
   716 \end{tabular}    
   717 \end{center}    
   717 \end{center}    
   718 
   718 
   720 The simplification does a pattern matching on the regular expression.
   720 The simplification does a pattern matching on the regular expression.
   721 When it detected that the regular expression is an alternative or
   721 When it detected that the regular expression is an alternative or
   722 sequence, it will try to simplify its children regular expressions
   722 sequence, it will try to simplify its children regular expressions
   723 recursively and then see if one of the children turn into $\ZERO$ or
   723 recursively and then see if one of the children turn into $\ZERO$ or
   724 $\ONE$, which might trigger further simplification at the current level.
   724 $\ONE$, which might trigger further simplification at the current level.
   725 The most involved part is the $\oplus$ clause, where we use two
   725 The most involved part is the $\sum$ clause, where we use two
   726 auxiliary functions $\textit{flatten}$ and $\textit{distinct}$ to open up nested
   726 auxiliary functions $\textit{flatten}$ and $\textit{distinct}$ to open up nested
   727 alternatives and reduce as many duplicates as possible. Function
   727 alternatives and reduce as many duplicates as possible. Function
   728 $\textit{distinct}$  keeps the first occurring copy only and remove all later ones
   728 $\textit{distinct}$  keeps the first occurring copy only and remove all later ones
   729 when detected duplicates. Function $\textit{flatten}$ opens up nested $\oplus$s.
   729 when detected duplicates. Function $\textit{flatten}$ opens up nested $\sum$s.
   730 Its recursive definition is given below:
   730 Its recursive definition is given below:
   731 
   731 
   732  \begin{center}
   732  \begin{center}
   733   \begin{tabular}{@{}lcl@{}}
   733   \begin{tabular}{@{}lcl@{}}
   734   $\textit{flatten} \; (_{bs}\oplus \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \;
   734   $\textit{flatten} \; (_{bs}\sum \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \;
   735      (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{flatten} \; as' $ \\
   735      (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{flatten} \; as' $ \\
   736   $\textit{flatten} \; \ZERO :: as'$ & $\dn$ & $ \textit{flatten} \;  \textit{as'} $ \\
   736   $\textit{flatten} \; \ZERO :: as'$ & $\dn$ & $ \textit{flatten} \;  \textit{as'} $ \\
   737     $\textit{flatten} \; a :: as'$ & $\dn$ & $a :: \textit{flatten} \; \textit{as'}$ \quad(otherwise) 
   737     $\textit{flatten} \; a :: as'$ & $\dn$ & $a :: \textit{flatten} \; \textit{as'}$ \quad(otherwise) 
   738 \end{tabular}    
   738 \end{tabular}    
   739 \end{center}  
   739 \end{center}  
   959 we use the helper function retrieve described by Sulzmann and Lu:
   959 we use the helper function retrieve described by Sulzmann and Lu:
   960 \begin{center}
   960 \begin{center}
   961 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
   961 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
   962   $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\
   962   $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\
   963   $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\
   963   $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\
   964   $\textit{retrieve}\,(_{bs}\oplus a::as)\,(\Left\,v)$ & $\dn$ &
   964   $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Left\,v)$ & $\dn$ &
   965      $bs \,@\, \textit{retrieve}\,a\,v$\\
   965      $bs \,@\, \textit{retrieve}\,a\,v$\\
   966   $\textit{retrieve}\,(_{bs}\oplus a::as)\,(\Right\,v)$ & $\dn$ &
   966   $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Right\,v)$ & $\dn$ &
   967   $\textit{bs} \,@\, \textit{retrieve}\,(_{[]}\oplus as)\,v$\\
   967   $\textit{bs} \,@\, \textit{retrieve}\,(_{[]}\sum as)\,v$\\
   968   $\textit{retrieve}\,(_{bs}a_1\cdot a_2)\,(\Seq\,v_1\,v_2)$ & $\dn$ &
   968   $\textit{retrieve}\,(_{bs}a_1\cdot a_2)\,(\Seq\,v_1\,v_2)$ & $\dn$ &
   969      $bs \,@\,\textit{retrieve}\,a_1\,v_1\,@\, \textit{retrieve}\,a_2\,v_2$\\
   969      $bs \,@\,\textit{retrieve}\,a_1\,v_1\,@\, \textit{retrieve}\,a_2\,v_2$\\
   970   $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,[])$ & $\dn$ &
   970   $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,[])$ & $\dn$ &
   971      $bs \,@\, [0]$\\
   971      $bs \,@\, [0]$\\
   972   $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,(v\!::\!vs))$ & $\dn$ &\\
   972   $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,(v\!::\!vs))$ & $\dn$ &\\
  1163 \end{tabular}    
  1163 \end{tabular}    
  1164 \end{center}    
  1164 \end{center}    
  1165 
  1165 
  1166 \noindent
  1166 \noindent
  1167 
  1167 
  1168 and the difinition of $\flatten$:
  1168 and the definition of $\flatten$:
  1169  \begin{center}
  1169  \begin{center}
  1170  \begin{tabular}{c c c}
  1170  \begin{tabular}{c c c}
  1171  $\flatten \; []$ & $\dn$ & $[]$\\
  1171  $\flatten \; []$ & $\dn$ & $[]$\\
  1172  $\flatten \; \ZERO::rs$ & $\dn$ & $rs$\\
  1172  $\flatten \; \ZERO::rs$ & $\dn$ & $rs$\\
  1173  $\flatten \;(_{\textit{bs}_1}\oplus \textit{rs}_1 ::rs)$ & $\dn$ & $(\map \, (\fuse \, \textit{bs}_1) \,\textit{rs}_1) ::: \flatten(rs)$\\
  1173  $\flatten \;(_{\textit{bs}_1}\sum \textit{rs}_1 ::rs)$ & $\dn$ & $(\map \, (\fuse \, \textit{bs}_1) \,\textit{rs}_1) ::: \flatten(rs)$\\
  1174  $\flatten \; r :: rs$ & $\dn$ & $r::\flatten(rs)$
  1174  $\flatten \; r :: rs$ & $\dn$ & $r::\flatten(rs)$
  1175  \end{tabular}
  1175  \end{tabular}
  1176  \end{center}
  1176  \end{center}
  1177  
  1177  
  1178  \noindent
  1178  \noindent
  1179 If we call $\simp$ on $\rup\backslash a$, just as $\backslash_{simp}$
  1179 If we call $\simp$ on $\rup\backslash a$, just as $\backslash_{simp}$
  1180 requires, then we would go throught the third clause of 
  1180 requires, then we would go through the third clause of 
  1181 the sequence case:$\quad\textit{case} \;  (\ONE, a_2') \Rightarrow  \textit{fuse} \; bs \;  a_2'$.
  1181 the sequence case:$\quad\textit{case} \;  (\ONE, a_2') \Rightarrow  \textit{fuse} \; bs \;  a_2'$.
  1182 The $\ZERO$ of $(_0\ONE  + \ZERO)$ is thrown away 
  1182 The $\ZERO$ of $(_0\ONE  + \ZERO)$ is thrown away 
  1183 by $\flatten$ and 
  1183 by $\flatten$ and 
  1184 $_0\ONE$ merged into $(_0a  +  _1a^*)$ by simply
  1184 $_0\ONE$ merged into $(_0a  +  _1a^*)$ by simply
  1185 putting its bits($_0$) to the front of the second component:
  1185 putting its bits($_0$) to the front of the second component:
  1188  namely, $(_0(_0a  +  _1a^*))\backslash a$, we get 
  1188  namely, $(_0(_0a  +  _1a^*))\backslash a$, we get 
  1189  $
  1189  $
  1190  _0(_0 \ONE  +  _1(_1\ONE \cdot a^*))
  1190  _0(_0 \ONE  +  _1(_1\ONE \cdot a^*))
  1191  $, and this simplifies to $_0(_0 \ONE  +  _{11} a^*)$
  1191  $, and this simplifies to $_0(_0 \ONE  +  _{11} a^*)$
  1192  by the third clause of the alternative case:
  1192  by the third clause of the alternative case:
  1193  $\quad\textit{case} \;  as' \Rightarrow  \textit{ALTS}\;bs\;as'$.
  1193  \begin{center}
       
  1194  $\quad\textit{case} \;  as' \Rightarrow  _{bs}\sum{as'}$.
       
  1195  \end{center}
       
  1196  
       
  1197  \noindent
  1194 The outmost bit $_0$ stays with 
  1198 The outmost bit $_0$ stays with 
  1195 the outmost regular expression, rather than being fused to
  1199 the outmost regular expression, rather than being fused to
  1196 its child regular expressions, as what we will later see happens
  1200 its child regular expressions, as what we will later see happens
  1197 to $\simp(\rup\backslash \, s)$.
  1201 to $\simp(\rup\backslash \, s)$.
  1198 If we choose to not simplify in the midst of derivative operations,
  1202 If we choose to not simplify in the midst of derivative operations,
  1238 $\rup\backslash_{simp} \, s$
  1242 $\rup\backslash_{simp} \, s$
  1239 and  $\simp(\rup\backslash s)$:
  1243 and  $\simp(\rup\backslash s)$:
  1240 \begin{center}
  1244 \begin{center}
  1241 \begin{tabular}{lcl}
  1245 \begin{tabular}{lcl}
  1242 $D =\{ (r_1 \cdot r_2,\; [c_1c_2]) \mid $ & $\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE,$\\
  1246 $D =\{ (r_1 \cdot r_2,\; [c_1c_2]) \mid $ & $\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE,$\\
  1243  $r_1 \; not \; \nullable, c_2 \in L(r_2),$ & $\exists \textit{rs},\textit{bs}.\;  r_2 \backslash c_2 = _{bs}{\oplus rs}$\\
  1247  $r_1 \; not \; \nullable, c_2 \in L(r_2),$ & $\exists \textit{rs},\textit{bs}.\;  r_2 \backslash c_2 = _{bs}{\sum rs}$\\
  1244 $\exists \textit{rs}_1. \; \simp(r_2 \backslash c_2) = _{bs}{\oplus \textit{rs}_1}$ &  $and \;\simp(r_1 \backslash [c_1c_2]) = \ZERO\}$\\
  1248 $\exists \textit{rs}_1. \; \simp(r_2 \backslash c_2) = _{bs}{\sum \textit{rs}_1}$ &  $and \;\simp(r_1 \backslash [c_1c_2]) = \ZERO\}$\\
  1245 \end{tabular}
  1249 \end{tabular}
  1246 \end{center}
  1250 \end{center}
  1247 We take a pair $(r, \;s)$ from the set $D$.
  1251 We take a pair $(r, \;s)$ from the set $D$.
  1248 
  1252 
  1249 Now we compute ${\bf \rup \backslash_{simp} s}$, we get:
  1253 Now we compute ${\bf \rup \backslash_{simp} s}$, we get:
  1263 \begin{center}
  1267 \begin{center}
  1264 $ \simp \left[  \fuse \; \textit{bs}_2\; r_2  \backslash c_2 \right]$,
  1268 $ \simp \left[  \fuse \; \textit{bs}_2\; r_2  \backslash c_2 \right]$,
  1265 \end{center}
  1269 \end{center}
  1266 which is equal to 
  1270 which is equal to 
  1267 \begin{center}
  1271 \begin{center}
  1268 $\simp \left[ \fuse \; \textit{bs}_2 \; _{bs}{\oplus rs} \right]$\\
  1272 $\simp \left[ \fuse \; \textit{bs}_2 \; _{bs}{\sum rs} \right]$\\
  1269 $=\simp \left[  \; _{bs_2++bs}{\oplus rs} \right]$\\
  1273 $=\simp \left[  \; _{bs_2++bs}{\sum rs} \right]$\\
  1270 $=  \; _{bs_2++bs}{\oplus \textit{rs}_1} $
  1274 $=  \; _{bs_2++bs}{\sum \textit{rs}_1} $
  1271 \end{center}
  1275 \end{center}
  1272 \noindent
  1276 \noindent
  1273 by using the properties from the set $D$ again
  1277 by using the properties from the set $D$ again
  1274 and again(The reason why we set so many conditions 
  1278 and again(The reason why we set so many conditions 
  1275 that the pair $(r,s)$ need to satisfy is because we can
  1279 that the pair $(r,s)$ need to satisfy is because we can
  1282 \noindent
  1286 \noindent
  1283 Again, using the properties above, we obtain
  1287 Again, using the properties above, we obtain
  1284 the following chain of equalities:
  1288 the following chain of equalities:
  1285 \begin{center}
  1289 \begin{center}
  1286 $\simp(\rup \backslash s)= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]= \simp\left[    \left(r_1 \backslash c_1\right) \cdot r_2  \big) \backslash c_2 \right]$\\
  1290 $\simp(\rup \backslash s)= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]= \simp\left[    \left(r_1 \backslash c_1\right) \cdot r_2  \big) \backslash c_2 \right]$\\
  1287 $= \simp \left[ \oplus[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; (\bmkeps \;r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$,
  1291 $= \simp \left[ \sum[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; (\bmkeps \;r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$,
  1288 \end{center}
  1292 \end{center}
  1289 \noindent
  1293 \noindent
  1290 as before, we call the bitcode returned by 
  1294 as before, we call the bitcode returned by 
  1291 $\bmkeps(r_1\backslash c_1)$ as
  1295 $\bmkeps(r_1\backslash c_1)$ as
  1292 $\textit{bs}_2$. 
  1296 $\textit{bs}_2$. 
  1293 Also, $\simp(r_2 \backslash c_2)$ is 
  1297 Also, $\simp(r_2 \backslash c_2)$ is 
  1294 $_{bs}\oplus \textit{rs}_1$, 
  1298 $_{bs}\sum \textit{rs}_1$, 
  1295 and $( \left(r_1 \backslash c_1\right) \backslash c_2  \cdot r_2)$
  1299 and $( \left(r_1 \backslash c_1\right) \backslash c_2  \cdot r_2)$
  1296 simplifies to $\ZERO$,
  1300 simplifies to $\ZERO$,
  1297 so the above term can be expanded as
  1301 so the above term can be expanded as
  1298 \begin{center}
  1302 \begin{center}
  1299 \begin{tabular}{l}
  1303 \begin{tabular}{l}
  1300 $\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}_2++\textit{bs}}\oplus \textit{rs}_1] ) \; \textit{match} $ \\
  1304 $\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}_2++\textit{bs}}\sum \textit{rs}_1] ) \; \textit{match} $ \\
  1301   $\textit{case} \; [] \Rightarrow  \ZERO$ \\
  1305   $\textit{case} \; [] \Rightarrow  \ZERO$ \\
  1302    $\textit{case} \; a :: [] \Rightarrow  \textit{\fuse \; \textit{bs} a}$ \\
  1306    $\textit{case} \; a :: [] \Rightarrow  \textit{\fuse \; \textit{bs} a}$ \\
  1303     $\textit{case} \;  as' \Rightarrow  _{[]}\oplus as'$\\ 
  1307     $\textit{case} \;  as' \Rightarrow  _{[]}\sum as'$\\ 
  1304 \end{tabular}
  1308 \end{tabular}
  1305 \end{center}
  1309 \end{center}
  1306 \noindent
  1310 \noindent
  1307 Applying the definition of $\flatten$, we get
  1311 Applying the definition of $\flatten$, we get
  1308 \begin{center}
  1312 \begin{center}
  1309 $_{[]}\oplus (\textit{map} \; \fuse (\textit{bs}_2 ++ bs) rs_1)$
  1313 $_{[]}\sum (\textit{map} \; \fuse (\textit{bs}_2 ++ bs) rs_1)$
  1310 \end{center}
  1314 \end{center}
  1311 \noindent
  1315 \noindent
  1312 compared to the result 
  1316 compared to the result 
  1313 \begin{center}
  1317 \begin{center}
  1314 $ \; _{bs_2++bs}{\oplus \textit{rs}_1} $
  1318 $ \; _{bs_2++bs}{\sum \textit{rs}_1} $
  1315 \end{center}
  1319 \end{center}
  1316 \noindent
  1320 \noindent
  1317 Note how these two regular expressions only
  1321 Note how these two regular expressions only
  1318 differ in terms of the position of the bits 
  1322 differ in terms of the position of the bits 
  1319 $\textit{bs}_2++\textit{bs}$. They are the same otherwise.
  1323 $\textit{bs}_2++\textit{bs}$. They are the same otherwise.
  1323 there exists an outer alternative.
  1327 there exists an outer alternative.
  1324 Note how the absence of simplification
  1328 Note how the absence of simplification
  1325 caused $\simp(\rup \backslash s)$ to
  1329 caused $\simp(\rup \backslash s)$ to
  1326 generate the nested alternatives structure:
  1330 generate the nested alternatives structure:
  1327 \begin{center}
  1331 \begin{center}
  1328 $  \oplus[\ZERO \;, \; _{bs}\oplus \textit{rs} ]$
  1332 $  \sum[\ZERO \;, \; _{bs}\sum \textit{rs} ]$
  1329 \end{center}
  1333 \end{center}
  1330 and this will always trigger the $\flatten$ to 
  1334 and this will always trigger the $\flatten$ to 
  1331 spill out the inner alternative's bitcode $\textit{bs}$,
  1335 spill out the inner alternative's bitcode $\textit{bs}$,
  1332 whereas when
  1336 whereas when
  1333 simplification is done along the way, 
  1337 simplification is done along the way, 
  1338 How about we do not allow the function $\simp$
  1342 How about we do not allow the function $\simp$
  1339 to fuse out the bits when it is unnecessary?
  1343 to fuse out the bits when it is unnecessary?
  1340 Like, for the above regular expression, we might
  1344 Like, for the above regular expression, we might
  1341 just delete the outer layer of alternative
  1345 just delete the outer layer of alternative
  1342 \begin{center}
  1346 \begin{center}
  1343 \st{$ {\oplus[\ZERO \;,}$} $_{bs}\oplus \textit{rs}$ \st{$]$}
  1347 \st{$ {\sum[\ZERO \;,}$} $_{bs}\sum \textit{rs}$ \st{$]$}
  1344 \end{center}
  1348 \end{center}
  1345 and get $_{bs}\oplus \textit{rs}$ instead, without
  1349 and get $_{bs}\sum \textit{rs}$ instead, without
  1346 fusing the bits $\textit{bs}$ inside to every element 
  1350 fusing the bits $\textit{bs}$ inside to every element 
  1347 of $\textit{rs}$.
  1351 of $\textit{rs}$.
  1348 This idea can be realized by making the following
  1352 This idea can be realized by making the following
  1349 changes to the $\simp$-function:
  1353 changes to the $\simp$-function:
  1350 \begin{center}
  1354 \begin{center}
  1351   \begin{tabular}{@{}lcl@{}}
  1355   \begin{tabular}{@{}lcl@{}}
  1352    
  1356    
  1353   $\textit{simp}' \; (_{\textit{bs}}(a_1 \cdot a_2))$ & $\dn$ & $\textit{as} \; \simp \; \textit{was} \; \textit{before} $ \\
  1357   $\textit{simp}' \; (_{\textit{bs}}(a_1 \cdot a_2))$ & $\dn$ & $\textit{as} \; \simp \; \textit{was} \; \textit{before} $ \\
  1354 
  1358 
  1355   $\textit{simp}' \; (_{bs}\oplus as)$ & $\dn$ & \st{$\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $} \\
  1359   $\textit{simp}' \; (_{bs}\sum as)$ & $\dn$ & \st{$\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $} \\
  1356   &&\st{$\quad\textit{case} \; [] \Rightarrow  \ZERO$} \\
  1360   &&\st{$\quad\textit{case} \; [] \Rightarrow  \ZERO$} \\
  1357    &&\st{$\quad\textit{case} \; a :: [] \Rightarrow  \textit{fuse bs a}$} \\
  1361    &&\st{$\quad\textit{case} \; a :: [] \Rightarrow  \textit{fuse bs a}$} \\
  1358    &&\st{$\quad\textit{case} \;  as' \Rightarrow  \textit{ALTS}\;bs\;as'$}\\ 
  1362    &&\st{$\quad\textit{case} \;  as' \Rightarrow  \textit{ALTS}\;bs\;as'$}\\ 
  1359    &&$\textit{if}(\textit{hollowAlternatives}( \textit{map \; simp \; as}))$\\
  1363    &&$\textit{if}(\textit{hollowAlternatives}( \textit{map \; simp \; as}))$\\
  1360    &&$\textit{then} \; \fuse  \; \textit{bs}\; \textit{extractAlt}(\textit{map} \; \simp \; \textit{as} )$\\
  1364    &&$\textit{then} \; \fuse  \; \textit{bs}\; \textit{extractAlt}(\textit{map} \; \simp \; \textit{as} )$\\
  1361    &&$\textit{else} \; \simp(_{bs} \oplus \textit{as})$\\
  1365    &&$\textit{else} \; \simp(_{bs} \sum \textit{as})$\\
  1362    
  1366    
  1363 
  1367 
  1364    $\textit{simp}' \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
  1368    $\textit{simp}' \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$   
  1365 \end{tabular}    
  1369 \end{tabular}    
  1366 \end{center}    
  1370 \end{center}    
  1367 
  1371 
  1368 \noindent
  1372 \noindent
  1369 given the definition of $\textit{hollowAlternatives}$ and  $\textit{extractAlt}$ :
  1373 given the definition of $\textit{hollowAlternatives}$ and  $\textit{extractAlt}$ :
  1370 \begin{center}
  1374 \begin{center}
  1371 $\textit{hollowAlternatives}( \textit{rs}) \dn 
  1375 $\textit{hollowAlternatives}( \textit{rs}) \dn 
  1372 \exists r = (_{\textit{bs}_1}\oplus \textit{rs}_1)  \in \textit{rs}.  \forall r' \in \textit{rs}, \;
  1376 \exists r = (_{\textit{bs}_1}\sum \textit{rs}_1)  \in \textit{rs}.  \forall r' \in \textit{rs}, \;
  1373 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r $
  1377 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r $
  1374 $\textit{extractAlt}( \textit{rs}) \dn \textit{if}\big(
  1378 $\textit{extractAlt}( \textit{rs}) \dn \textit{if}\big(
  1375 \exists r = (_{\textit{bs}_1}\oplus \textit{rs}_1)  \in \textit{rs}.  \forall r' \in \textit{rs}, \;
  1379 \exists r = (_{\textit{bs}_1}\sum \textit{rs}_1)  \in \textit{rs}.  \forall r' \in \textit{rs}, \;
  1376 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r \big)\; \textit{then} \; \textit{return} \; r$
  1380 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r \big)\; \textit{then} \; \textit{return} \; r$
  1377 \end{center}
  1381 \end{center}
  1378 \noindent
  1382 \noindent
  1379 Basically, $\textit{hollowAlternatives}$ captures the feature of
  1383 Basically, $\textit{hollowAlternatives}$ captures the feature of
  1380 a list of regular expression of the shape 
  1384 a list of regular expression of the shape 
  1381 \begin{center}
  1385 \begin{center}
  1382 $  \oplus[\ZERO \;, \; _{bs}\oplus \textit{rs} ]$
  1386 $  \sum[\ZERO \;, \; _{bs}\sum \textit{rs} ]$
  1383 \end{center}
  1387 \end{center}
  1384 and this means we can simply elevate the 
  1388 and this means we can simply elevate the 
  1385 inner regular expression $_{bs}\oplus \textit{rs}$
  1389 inner regular expression $_{bs}\sum \textit{rs}$
  1386  to the outmost
  1390  to the outmost
  1387 and throw away the useless $\ZERO$s and
  1391 and throw away the useless $\ZERO$s and
  1388 the outer $\oplus$ wrapper.
  1392 the outer $\sum$ wrapper.
  1389 Using this new definition of simp, 
  1393 Using this new definition of simp, 
  1390 under the example where  $r$ is the regular expression
  1394 under the example where  $r$ is the regular expression
  1391 $(a+b)(a+a*)$ and $s$  is the string $aa$
  1395 $(a+b)(a+a*)$ and $s$  is the string $aa$
  1392 the problem of $\rup\backslash_{simp} \, s \neq \simp(\rup\backslash s)$
  1396 the problem of $\rup\backslash_{simp} \, s \neq \simp(\rup\backslash s)$
  1393 is resolved.
  1397 is resolved.
  1458 $\textit{sub}(\ONE) \dn \{\ONE\}$\\
  1462 $\textit{sub}(\ONE) \dn \{\ONE\}$\\
  1459 $\textit{sub}(r_1 \cdot r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1 \cdot r_2\}$\\
  1463 $\textit{sub}(r_1 \cdot r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1 \cdot r_2\}$\\
  1460 $\textit{sub}(r_1 + r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1+r_2\}$\\
  1464 $\textit{sub}(r_1 + r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1+r_2\}$\\
  1461 \end{center}
  1465 \end{center}
  1462 $\textit{good}(r) \dn r \neq \ZERO \land \\
  1466 $\textit{good}(r) \dn r \neq \ZERO \land \\
  1463 \forall r' \in \textit{sub}(r), \textit{if} \; r' = _{bs_1}\oplus(rs_1), \;
  1467 \forall r' \in \textit{sub}(r), \textit{if} \; r' = _{bs_1}\sum(rs_1), \;
  1464 \textit{then} \nexists r'' \in \textit{rs}_1 \; s.t.\;
  1468 \textit{then} \nexists r'' \in \textit{rs}_1 \; s.t.\;
  1465 r'' = _{bs_2}\oplus \textit{rs}_2 $
  1469 r'' = _{bs_2}\sum \textit{rs}_2 $
  1466 
  1470 
  1467 The properties are mainly the ones below:
  1471 The properties are mainly the ones below:
  1468 \begin{itemize}
  1472 \begin{itemize}
  1469 \item
  1473 \item
  1470 \begin{center}
  1474 \begin{center}
  1505  $_{bs}{r_1 \cdot r_2}$ & 
  1509  $_{bs}{r_1 \cdot r_2}$ & 
  1506  $\gg$ & 
  1510  $\gg$ & 
  1507  $\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\
  1511  $\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\
  1508 
  1512 
  1509  $\textit{if} \; r \gg \textit{bs}_1$ &  & $\textit{then}$  &
  1513  $\textit{if} \; r \gg \textit{bs}_1$ &  & $\textit{then}$  &
  1510  $_{bs}{\oplus(r :: \textit{rs}})$ & 
  1514  $_{bs}{\sum(r :: \textit{rs}})$ & 
  1511  $\gg$ & 
  1515  $\gg$ & 
  1512  $\textit{bs} @ \textit{bs}_1 $\\ 
  1516  $\textit{bs} @ \textit{bs}_1 $\\ 
  1513 
  1517 
  1514  $\textit{if} \; _{bs}(\oplus \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ &  & $\textit{then}$  &
  1518  $\textit{if} \; _{bs}(\sum \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ &  & $\textit{then}$  &
  1515  $_{bs}{\oplus(r :: \textit{rs}})$ & 
  1519  $_{bs}{\sum(r :: \textit{rs}})$ & 
  1516  $\gg$ & 
  1520  $\gg$ & 
  1517  $\textit{bs} @ \textit{bs}_1 $\\  
  1521  $\textit{bs} @ \textit{bs}_1 $\\  
  1518 
  1522 
  1519  $\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ &  $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$  &
  1523  $\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ &  $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$  &
  1520  $_{bs}r^* $ & 
  1524  $_{bs}r^* $ &