ninems/ninems.tex
changeset 86 51ac1ab6e1fd
parent 85 ba40ab3658ca
child 87 9c52c21b5db3
equal deleted inserted replaced
85:ba40ab3658ca 86:51ac1ab6e1fd
   870 %\end{definition}
   870 %\end{definition}
   871 
   871 
   872 Sulzmann and Lu's integrated the bitcodes into regular expressions to
   872 Sulzmann and Lu's integrated the bitcodes into regular expressions to
   873 create annotated regular expressions \cite{Sulzmann2014}.
   873 create annotated regular expressions \cite{Sulzmann2014}.
   874 \emph{Annotated regular expressions} are defined by the following
   874 \emph{Annotated regular expressions} are defined by the following
   875 grammar:\comment{ALTS should have  an $as$ in  the definitions, not  just $a_1$ and $a_2$}
   875 grammar:%\comment{ALTS should have  an $as$ in  the definitions, not  just $a_1$ and $a_2$}
   876 
   876 
   877 \begin{center}
   877 \begin{center}
   878 \begin{tabular}{lcl}
   878 \begin{tabular}{lcl}
   879   $\textit{a}$ & $::=$  & $\textit{ZERO}$\\
   879   $\textit{a}$ & $::=$  & $\textit{ZERO}$\\
   880                   & $\mid$ & $\textit{ONE}\;\;bs$\\
   880                   & $\mid$ & $\textit{ONE}\;\;bs$\\
   881                   & $\mid$ & $\textit{CHAR}\;\;bs\,c$\\
   881                   & $\mid$ & $\textit{CHAR}\;\;bs\,c$\\
   882                   & $\mid$ & $\textit{ALTS}\;\;bs\,a_1 \, a_2$\\
   882                   & $\mid$ & $\textit{ALTS}\;\;bs\,as$\\
   883                   & $\mid$ & $\textit{SEQ}\;\;bs\,a_1\,a_2$\\
   883                   & $\mid$ & $\textit{SEQ}\;\;bs\,a_1\,a_2$\\
   884                   & $\mid$ & $\textit{STAR}\;\;bs\,a$
   884                   & $\mid$ & $\textit{STAR}\;\;bs\,a$
   885 \end{tabular}    
   885 \end{tabular}    
   886 \end{center}  
   886 \end{center}  
   887 %(in \textit{ALTS})
   887 %(in \textit{ALTS})
   888 
   888 
   889 \noindent
   889 \noindent
   890 where $bs$ stands for bitcodes, and $a$  for $\bold{a}$nnotated regular
   890 where $bs$ stands for bitcodes, $a$  for $\bold{a}$nnotated regular
   891 expressions. We will show that these bitcodes encode information about
   891 expressions and $as$ for a list of annotated regular expressions.
       
   892 The alternative constructor($\textit{ALTS}$) has been generalized to 
       
   893 accept a list of annotated regular expressions rather than just 2.
       
   894 We will show that these bitcodes encode information about
   892 the (POSIX) value that should be generated by the Sulzmann and Lu
   895 the (POSIX) value that should be generated by the Sulzmann and Lu
   893 algorithm.
   896 algorithm.
   894 
   897 
   895 
   898 
   896 To do lexing using annotated regular expressions, we shall first
   899 To do lexing using annotated regular expressions, we shall first
   903 \begin{tabular}{lcl}
   906 \begin{tabular}{lcl}
   904   $(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\
   907   $(\ZERO)^\uparrow$ & $\dn$ & $\textit{ZERO}$\\
   905   $(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$\\
   908   $(\ONE)^\uparrow$ & $\dn$ & $\textit{ONE}\,[]$\\
   906   $(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\
   909   $(c)^\uparrow$ & $\dn$ & $\textit{CHAR}\,[]\,c$\\
   907   $(r_1 + r_2)^\uparrow$ & $\dn$ &
   910   $(r_1 + r_2)^\uparrow$ & $\dn$ &
   908          $\textit{ALTS}\;[]\,(\textit{fuse}\,[\Z]\,r_1^\uparrow)\,
   911   $\textit{ALTS}\;[]\,List((\textit{fuse}\,[\Z]\,r_1^\uparrow),\,
   909                             (\textit{fuse}\,[\S]\,r_2^\uparrow)$\\
   912   (\textit{fuse}\,[\S]\,r_2^\uparrow))$\\
   910   $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
   913   $(r_1\cdot r_2)^\uparrow$ & $\dn$ &
   911          $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$\\
   914          $\textit{SEQ}\;[]\,r_1^\uparrow\,r_2^\uparrow$\\
   912   $(r^*)^\uparrow$ & $\dn$ &
   915   $(r^*)^\uparrow$ & $\dn$ &
   913          $\textit{STAR}\;[]\,r^\uparrow$\\
   916          $\textit{STAR}\;[]\,r^\uparrow$\\
   914 \end{tabular}    
   917 \end{tabular}    
   927   $\textit{fuse}\;bs\,(\textit{ZERO})$ & $\dn$ & $\textit{ZERO}$\\
   930   $\textit{fuse}\;bs\,(\textit{ZERO})$ & $\dn$ & $\textit{ZERO}$\\
   928   $\textit{fuse}\;bs\,(\textit{ONE}\,bs')$ & $\dn$ &
   931   $\textit{fuse}\;bs\,(\textit{ONE}\,bs')$ & $\dn$ &
   929      $\textit{ONE}\,(bs\,@\,bs')$\\
   932      $\textit{ONE}\,(bs\,@\,bs')$\\
   930   $\textit{fuse}\;bs\,(\textit{CHAR}\,bs'\,c)$ & $\dn$ &
   933   $\textit{fuse}\;bs\,(\textit{CHAR}\,bs'\,c)$ & $\dn$ &
   931      $\textit{CHAR}\,(bs\,@\,bs')\,c$\\
   934      $\textit{CHAR}\,(bs\,@\,bs')\,c$\\
   932   $\textit{fuse}\;bs\,(\textit{ALTS}\,bs'\,a_1\,a_2)$ & $\dn$ &
   935   $\textit{fuse}\;bs\,(\textit{ALTS}\,bs'\,as)$ & $\dn$ &
   933      $\textit{ALTS}\,(bs\,@\,bs')\,a_1\,a_2$\\
   936      $\textit{ALTS}\,(bs\,@\,bs')\,as$\\
   934   $\textit{fuse}\;bs\,(\textit{SEQ}\,bs'\,a_1\,a_2)$ & $\dn$ &
   937   $\textit{fuse}\;bs\,(\textit{SEQ}\,bs'\,a_1\,a_2)$ & $\dn$ &
   935      $\textit{SEQ}\,(bs\,@\,bs')\,a_1\,a_2$\\
   938      $\textit{SEQ}\,(bs\,@\,bs')\,a_1\,a_2$\\
   936   $\textit{fuse}\;bs\,(\textit{STAR}\,bs'\,a)$ & $\dn$ &
   939   $\textit{fuse}\;bs\,(\textit{STAR}\,bs'\,a)$ & $\dn$ &
   937      $\textit{STAR}\,(bs\,@\,bs')\,a$
   940      $\textit{STAR}\,(bs\,@\,bs')\,a$
   938 \end{tabular}    
   941 \end{tabular}    
   951   $(\textit{ZERO})\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   954   $(\textit{ZERO})\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   952   $(\textit{ONE}\;bs)\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   955   $(\textit{ONE}\;bs)\,\backslash c$ & $\dn$ & $\textit{ZERO}$\\  
   953   $(\textit{CHAR}\;bs\,d)\,\backslash c$ & $\dn$ &
   956   $(\textit{CHAR}\;bs\,d)\,\backslash c$ & $\dn$ &
   954         $\textit{if}\;c=d\; \;\textit{then}\;
   957         $\textit{if}\;c=d\; \;\textit{then}\;
   955          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
   958          \textit{ONE}\;bs\;\textit{else}\;\textit{ZERO}$\\  
   956   $(\textit{ALTS}\;bs\,a_1\,a_2)\,\backslash c$ & $\dn$ &
   959   $(\textit{ALTS}\;bs\,as)\,\backslash c$ & $\dn$ &
   957         $\textit{ALTS}\;bs\,(a_1\,\backslash c)\,(a_2\,\backslash c)$\\
   960   $\textit{ALTS}\;bs\,(as.map(\backslash c))$\\
   958   $(\textit{SEQ}\;bs\,a_1\,a_2)\,\backslash c$ & $\dn$ &
   961   $(\textit{SEQ}\;bs\,a_1\,a_2)\,\backslash c$ & $\dn$ &
   959      $\textit{if}\;\textit{bnullable}\,a_1$\\
   962      $\textit{if}\;\textit{bnullable}\,a_1$\\
   960   & &$\textit{then}\;\textit{ALTS}\,bs\,(\textit{SEQ}\,[]\,(a_1\,\backslash c)\,a_2)$\\
   963 					       & &$\textit{then}\;\textit{ALTS}\,bs\,List((\textit{SEQ}\,[]\,(a_1\,\backslash c)\,a_2),$\\
   961   & &$\phantom{\textit{then}\;\textit{ALTS}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))$\\
   964 					       & &$\phantom{\textit{then}\;\textit{ALTS}\,bs\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c)))$\\
   962   & &$\textit{else}\;\textit{SEQ}\,bs\,(a_1\,\backslash c)\,a_2$\\
   965   & &$\textit{else}\;\textit{SEQ}\,bs\,(a_1\,\backslash c)\,a_2$\\
   963   $(\textit{STAR}\,bs\,a)\,\backslash c$ & $\dn$ &
   966   $(\textit{STAR}\,bs\,a)\,\backslash c$ & $\dn$ &
   964       $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\,\backslash c))\,
   967       $\textit{SEQ}\;bs\,(\textit{fuse}\, [\Z] (r\,\backslash c))\,
   965        (\textit{STAR}\,[]\,r)$
   968        (\textit{STAR}\,[]\,r)$
   966 \end{tabular}    
   969 \end{tabular}    
   991 
   994 
   992 %\begin{definition}[\textit{bmkeps}]\mbox{}
   995 %\begin{definition}[\textit{bmkeps}]\mbox{}
   993 \begin{center}
   996 \begin{center}
   994 \begin{tabular}{lcl}
   997 \begin{tabular}{lcl}
   995   $\textit{bmkeps}\,(\textit{ONE}\;bs)$ & $\dn$ & $bs$\\
   998   $\textit{bmkeps}\,(\textit{ONE}\;bs)$ & $\dn$ & $bs$\\
   996   $\textit{bmkeps}\,(\textit{ALTS}\;bs\,a_1\,a_2)$ & $\dn$ &
   999   $\textit{bmkeps}\,(\textit{ALTS}\;bs\,a::as)$ & $\dn$ &
   997      $\textit{if}\;\textit{bnullable}\,a_1$\\
  1000      $\textit{if}\;\textit{bnullable}\,a$\\
   998   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a_1$\\
  1001   & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\
   999   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,a_2$\\
  1002   & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(\textit{ALTS}\;bs\,as)$\\
  1000   $\textit{bmkeps}\,(\textit{SEQ}\;bs\,a_1\,a_2)$ & $\dn$ &
  1003   $\textit{bmkeps}\,(\textit{SEQ}\;bs\,a_1\,a_2)$ & $\dn$ &
  1001      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
  1004      $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\
  1002   $\textit{bmkeps}\,(\textit{STAR}\;bs\,a)$ & $\dn$ &
  1005   $\textit{bmkeps}\,(\textit{STAR}\;bs\,a)$ & $\dn$ &
  1003      $bs \,@\, [\S]$
  1006      $bs \,@\, [\S]$
  1004 \end{tabular}    
  1007 \end{tabular}    
  1098 
  1101 
  1099 Suppose we apply simplification after each derivative step, and view
  1102 Suppose we apply simplification after each derivative step, and view
  1100 these two operations as an atomic one: $a \backslash_{simp}\,c \dn
  1103 these two operations as an atomic one: $a \backslash_{simp}\,c \dn
  1101 \textit{simp}(a \backslash c)$. Then we can use the previous natural
  1104 \textit{simp}(a \backslash c)$. Then we can use the previous natural
  1102 extension from derivative w.r.t.~character to derivative
  1105 extension from derivative w.r.t.~character to derivative
  1103 w.r.t.~string:\comment{simp in  the [] case?}
  1106 w.r.t.~string:%\comment{simp in  the [] case?}
  1104 
  1107 
  1105 \begin{center}
  1108 \begin{center}
  1106 \begin{tabular}{lcl}
  1109 \begin{tabular}{lcl}
  1107 $r \backslash_{simp} (c\!::\!s) $ & $\dn$ & $(r \backslash_{simp}\, c) \backslash_{simp}\, s$ \\
  1110 $r \backslash_{simp} (c\!::\!s) $ & $\dn$ & $(r \backslash_{simp}\, c) \backslash_{simp}\, s$ \\
  1108 $r \backslash_{simp} [\,] $ & $\dn$ & $r$
  1111 $r \backslash_{simp} [\,] $ & $\dn$ & $r$
  1155 
  1158 
  1156 \noindent 
  1159 \noindent 
  1157 We like to settle this correctness claim. It is relatively
  1160 We like to settle this correctness claim. It is relatively
  1158 straightforward to establish that after one simplification step, the part of a
  1161 straightforward to establish that after one simplification step, the part of a
  1159 nullable derivative that corresponds to a POSIX value remains intact and can
  1162 nullable derivative that corresponds to a POSIX value remains intact and can
  1160 still be collected, in other words, we can show that\comment{Double-check....I
  1163 still be collected, in other words, we can show that
  1161 think this  is not the case}
  1164 %\comment{Double-check....I
       
  1165 %think this  is not the case}
  1162 %\comment{If i remember correctly, you have proved this lemma.
  1166 %\comment{If i remember correctly, you have proved this lemma.
  1163 %I feel this is indeed not true because you might place arbitrary 
  1167 %I feel this is indeed not true because you might place arbitrary 
  1164 %bits on the regex r, however if this is the case, did i remember wrongly that
  1168 %bits on the regex r, however if this is the case, did i remember wrongly that
  1165 %you proved something like simplification does not affect $\textit{bmkeps}$ results?
  1169 %you proved something like simplification does not affect $\textit{bmkeps}$ results?
  1166 %Anyway, i have amended this a little bit so it does not allow arbitrary bits attached
  1170 %Anyway, i have amended this a little bit so it does not allow arbitrary bits attached
  1175 additional $r$ in $r+r$  does not delete important POSIX information in
  1179 additional $r$ in $r+r$  does not delete important POSIX information in
  1176 a regular expression. The hard part of this proof is to establish that
  1180 a regular expression. The hard part of this proof is to establish that
  1177 
  1181 
  1178 \begin{center}
  1182 \begin{center}
  1179 	$ \textit{blexer}\_{simp}(r, \; s) =  \textit{blexer}(r, \; s)$
  1183 	$ \textit{blexer}\_{simp}(r, \; s) =  \textit{blexer}(r, \; s)$
  1180 \end{center}\comment{This is not true either...look at the definion blexer/blexer-simp}
  1184 \end{center}
       
  1185 %comment{This is not true either...look at the definion blexer/blexer-simp}
  1181 
  1186 
  1182 \noindent That is, if we do derivative on regular expression $r$ and then
  1187 \noindent That is, if we do derivative on regular expression $r$ and then
  1183 simplify it, and repeat this process until we exhaust the string, we get a
  1188 simplify it, and repeat this process until we exhaust the string, we get a
  1184 regular expression $r''$($\textit{LHS}$)  that provides the POSIX matching
  1189 regular expression $r''$($\textit{LHS}$)  that provides the POSIX matching
  1185 information, which is exactly the same as the result $r'$($\textit{RHS}$ of the
  1190 information, which is exactly the same as the result $r'$($\textit{RHS}$ of the
  1195 become very different.  The crucial point is to find the
  1200 become very different.  The crucial point is to find the
  1196 $\textit{POSIX}$  information of a regular expression and how it is modified,
  1201 $\textit{POSIX}$  information of a regular expression and how it is modified,
  1197 augmented and propagated 
  1202 augmented and propagated 
  1198 during simplification in parallel with the regular expression that
  1203 during simplification in parallel with the regular expression that
  1199 has not been simplified in the subsequent derivative operations.  To aid this,
  1204 has not been simplified in the subsequent derivative operations.  To aid this,
  1200 we use the helper function retrieve described by Sulzmann and Lu: \\definition
  1205 we use the helper function retrieve described by Sulzmann and Lu:
  1201 of retrieve TODO\comment{Did not read further}\\
  1206 \begin{center}
       
  1207 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
       
  1208   $\textit{retrieve}\,(\textit{ONE}\,bs)\,\Empty$ & $\dn$ & $bs$\\
       
  1209   $\textit{retrieve}\,(\textit{CHAR}\,bs\,c)\,(\Char\,d)$ & $\dn$ & $bs$\\
       
  1210   $\textit{retrieve}\,(\textit{ALTS}\,bs\,a::as)\,(\Left\,v)$ & $\dn$ &
       
  1211      $bs \,@\, \textit{retrieve}\,a\,v$\\
       
  1212   $\textit{retrieve}\,(\textit{ALTS}\,bs\,a::as)\,(\Right\,v)$ & $\dn$ &
       
  1213   $bs \,@\, \textit{retrieve}\,(\textit{ALTS}\,bs\,as)\,v$\\
       
  1214   $\textit{retrieve}\,(\textit{SEQ}\,bs\,a_1\,a_2)\,(\Seq\,v_1\,v_2)$ & $\dn$ &
       
  1215      $bs \,@\,\textit{retrieve}\,a_1\,v_1\,@\, \textit{retrieve}\,a_2\,v_2$\\
       
  1216   $\textit{retrieve}\,(\textit{STAR}\,bs\,a)\,(\Stars\,[])$ & $\dn$ &
       
  1217      $bs \,@\, [\S]$\\
       
  1218   $\textit{retrieve}\,(\textit{STAR}\,bs\,a)\,(\Stars\,(v\!::\!vs))$ & $\dn$ &\\
       
  1219   \multicolumn{3}{l}{
       
  1220      \hspace{3cm}$bs \,@\, [\Z] \,@\, \textit{retrieve}\,a\,v\,@\,
       
  1221                     \textit{retrieve}\,(\textit{STAR}\,[]\,a)\,(\Stars\,vs)$}\\
       
  1222 \end{tabular}
       
  1223 \end{center}
       
  1224 %\comment{Did not read further}\\
  1202 This function assembles the bitcode 
  1225 This function assembles the bitcode 
  1203 %that corresponds to a lexical value for how
  1226 %that corresponds to a lexical value for how
  1204 %the current derivative matches the suffix of the string(the characters that
  1227 %the current derivative matches the suffix of the string(the characters that
  1205 %have not yet appeared, but will appear as the successive derivatives go on.
  1228 %have not yet appeared, but will appear as the successive derivatives go on.
  1206 %How do we get this "future" information? By the value $v$, which is
  1229 %How do we get this "future" information? By the value $v$, which is