etnms/etnms.tex
changeset 135 0ed8053dcf21
parent 134 5ca3b84e0eba
child 136 8c404da3cb49
equal deleted inserted replaced
134:5ca3b84e0eba 135:0ed8053dcf21
   913 explicitly showing the order of characters being
   913 explicitly showing the order of characters being
   914 injected back in each step.
   914 injected back in each step.
   915 With $\flex$ we can write $\lexer$ this way: 
   915 With $\flex$ we can write $\lexer$ this way: 
   916 
   916 
   917 \begin{center}
   917 \begin{center}
   918 $\lexer \;r\; s = \flex \;id \; r\;s \;(\mkeps r\backslash s)$
   918 $\lexer \;r\; s = \flex \;id \; r\;s \;(\mkeps (r\backslash s))$
   919 \end{center}
   919 \end{center}
   920 
   920 
   921 %\noindent
   921 \noindent
   922 %$\flex$ focuses on the injections instead of the derivatives ,
   922 The other trick, which is the crux in the existing proof, 
   923 %compared to the original definition of $\lexer$, which puts equal
   923 is the use of $\retrieve$-function by Sulzmann and Lu:
   924 %amount of emphasis on injection and derivative with respect to each
       
   925 %character:
       
   926 
       
   927 %\begin{center}
       
   928 %\begin{tabular}{lcl}
       
   929 %$\textit{lexer} \; r\; (c\!::\!s) $ & $\dn$ & $\textit{case} \; \lexer \; (r\backslash c) \;s \; %\textit{of}$ \\
       
   930 % & & $\textit{None} \; \Longrightarrow \; \textit{None}$\\
       
   931 %  & & $\textbar \; v \; \Longrightarrow \; \inj \; r\;c\;v$\\
       
   932 %$\textit{lexer} \; r\;  [\,] $ & $\dn$ & $\textit{if} \; \nullable (r) \; \textit{then} \; \mkeps% (r) \; \textit{else} \;None$
       
   933 %\end{tabular}
       
   934 %\end{center}
       
   935 
       
   936 \noindent
       
   937 The crux in the existing proof is how $\flex$ relates to injection, namely
       
   938 
       
   939 \begin{center}
       
   940 $\flex \; r \; id \; (s@[c]) \; v = \flex \;  r \; id \; s \; (inj \; (r\backslash s) \; c\; v)$.
       
   941 \end{center}
       
   942 
       
   943 \noindent
       
   944 This property allows one to rewrite an induction hypothesis like 
       
   945 
       
   946 \begin{center} 
       
   947 $ \flex \; r\; id\; (s@[c])\; v = \textit{decode} \;( \textit{retrieve}\; (\rup \backslash s) \; (\inj \; (r\backslash s) \;c\;v)\;) r$
       
   948 \end{center}
       
   949 
       
   950 
       
   951 \subsubsection{Retrieve Function}
       
   952 The crucial point is to find the
       
   953 $\textit{POSIX}$  information of a regular expression and how it is modified,
       
   954 augmented and propagated 
       
   955 during simplification in parallel with the regular expression that
       
   956 has not been simplified in the subsequent derivative operations.  To aid this,
       
   957 we use the helper function retrieve described by Sulzmann and Lu:
       
   958 \begin{center}
   924 \begin{center}
   959 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
   925 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}}
   960   $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\
   926   $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\
   961   $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\
   927   $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\
   962   $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Left\,v)$ & $\dn$ &
   928   $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Left\,v)$ & $\dn$ &
   971   \multicolumn{3}{l}{
   937   \multicolumn{3}{l}{
   972      \hspace{3cm}$bs \,@\, [1] \,@\, \textit{retrieve}\,a\,v\,@\,
   938      \hspace{3cm}$bs \,@\, [1] \,@\, \textit{retrieve}\,a\,v\,@\,
   973                     \textit{retrieve}\,(_{[]}a^*)\,(\Stars\,vs)$}\\
   939                     \textit{retrieve}\,(_{[]}a^*)\,(\Stars\,vs)$}\\
   974 \end{tabular}
   940 \end{tabular}
   975 \end{center}
   941 \end{center}
   976 %\comment{Did not read further}\\
   942 
       
   943 \noindent
   977 This function assembles the bitcode 
   944 This function assembles the bitcode 
   978 %that corresponds to a lexical value for how
       
   979 %the current derivative matches the suffix of the string(the characters that
       
   980 %have not yet appeared, but will appear as the successive derivatives go on.
       
   981 %How do we get this "future" information? By the value $v$, which is
       
   982 %computed by a pass of the algorithm that uses
       
   983 %$inj$ as described in the previous section).  
       
   984 using information from both the derivative regular expression and the
   945 using information from both the derivative regular expression and the
   985 value. Sulzmann and Lu poroposed this function, but did not prove
   946 value. Sulzmann and Lu proposed this function, but did not prove
   986 anything about it. Ausaf and Urban used it to connect the bitcoded
   947 anything about it. Ausaf and Urban made this important use of the
   987 algorithm to the older algorithm by the following equation:
   948 fact about $\retrieve$ in the proof:
   988  
   949  
   989  \begin{center} $inj \;a\; c \; v = \textit{decode} \; (\textit{retrieve}\;
   950  \begin{center} 
   990 	 (r^\uparrow)\backslash_{simp} \,c)\,v)$ 
   951  $\retrieve \rup \backslash c \; v = \retrieve  \rup (\inj \;r \;c \; v)$
   991  \end{center} 
   952  \end{center} 
   992 
   953 
   993 \noindent
   954 \noindent
   994 whereby $r^\uparrow$ stands for the internalised version of $r$. Ausaf
   955 whereby $r^\uparrow$ stands for the internalised version of $r$.
   995 and Urban also used this fact to prove  the correctness of bitcoded
   956 This fact, together with the fact of how $\flex$ relates to injection:
   996 algorithm without simplification.  Our purpose of using this, however,
   957 
       
   958 \begin{equation}\label{flex}
       
   959 \flex \; r \; id \; (s@[c]) \; v = \flex \;  r \; id \; s \; (inj \; (r\backslash s) \; c\; v).
       
   960 \end{equation}
       
   961 
       
   962 \noindent
       
   963 can be used to prove what we want:
       
   964 \begin{center} 
       
   965 $ \flex \; r\; id\; s\; v = \textit{decode} \;( \textit{bmkeps}\; (\rup \backslash s) ) r$
       
   966 \end{center}
       
   967 \noindent
       
   968 If we state the inductive hypothesis to be
       
   969 \begin{center} 
       
   970 $ \flex \; r\; id\; s\; v = \textit{decode} \;( \textit{retrieve}\; (\rup \backslash s) \; v \;) r$
       
   971 \end{center}
       
   972 \noindent
       
   973 where $\mkeps(r\backslash s) $ is denoted using $v$,
       
   974 then a reverse induction technique is 
       
   975 best suited here to make use of 
       
   976 the fact~\eqref{flex} in proving the $n+1$ step:
       
   977 \begin{center} 
       
   978 $ \flex \; r\; id\; (s@[c])\; v = \textit{decode} \;( \textit{retrieve}\; (\rup \backslash s) \; (\inj \; (r\backslash s) \;c\;v)\;) r$
       
   979 \end{center}
       
   980 and using a lemma that
       
   981 \begin{center}
       
   982 $\textit{retrieve}\; (\rup \backslash s) \; (\inj \; (r\backslash s) \;c\;v)\; = \textit{retrieve}\; (\rup \backslash s@[c])  \; v\; $
       
   983 \end{center}
       
   984 we get
       
   985 \begin{center} 
       
   986 $  \textit{decode} \;( \textit{retrieve}\; (\rup \backslash s) \; (\inj \; (r\backslash s) \;c\;v)\;) r = \textit{decode} \;( \textit{retrieve}\; (\rup \backslash s@[c]) \; v\;) r $
       
   987 \end{center}
       
   988 and the inductive step is done because
       
   989 \begin{center} 
       
   990 $ \textit{retrieve}\; (\rup \backslash s@[c]) \; \mkeps(r\backslash s) =  \bmkeps \;(\rup \backslash s@[c])$
       
   991 \end{center}
       
   992 
       
   993 \subsubsection{Using Retrieve Function In a New Setting}
       
   994  Ausaf
       
   995 and Urban used $\retrieve$  to prove  the correctness of bitcoded
       
   996 algorithm without simplification.  Our purpose of using it, however,
   997 is to establish 
   997 is to establish 
   998 
   998 
   999 \begin{center}
   999 \begin{center}
  1000 $ \textit{retrieve} \;
  1000 $ \textit{retrieve} \;
  1001 a \; v \;=\; \textit{retrieve}  \; (\textit{simp}\,a) \; v'.$
  1001 a \; v \;=\; \textit{retrieve}  \; (\textit{simp}\,a) \; v'.$
  1408 This discrepancy does not appear for the old
  1408 This discrepancy does not appear for the old
  1409 version of $\simp$.
  1409 version of $\simp$.
  1410 
  1410 
  1411 Why?
  1411 Why?
  1412 
  1412 
       
  1413 We shall illustrate in detail  again of what happened in each recursive call of 
       
  1414 $\backslash$ and $\backslash_{simp}$.
  1413 During the first derivative operation, 
  1415 During the first derivative operation, 
  1414 \begin{center}
  1416 \begin{center}
  1415 $\rup\backslash a=(    _0[ \ONE\cdot {\bf b}] + _1( _0[ _1\ONE \cdot {\bf a}^*] + [ \ONE \cdot {\bf a}])      )$,
  1417 $\rup\backslash a=(    _0[ \ONE\cdot {\bf b}] + _1( _0[ _1\ONE \cdot {\bf a}^*] + _1[ \ONE \cdot {\bf a}])      )$,
  1416 \end{center}
  1418 \end{center}
  1417 \noindent
  1419 \noindent
  1418  the second derivative gives us
  1420  the second derivative gives us
  1419  \begin{center}
  1421  \begin{center}
  1420 $\rup\backslash a=(_0( [\ZERO\cdot {\bf b}] + 0) + _1( _0( [\ZERO\cdot {\bf a}^*] + _1[ _1\ONE \cdot {\bf a}^*]) + _1( [\ZERO \cdot {\bf a}] + \ONE)  ))$,
  1422 $(\rup\backslash a)\backslash a=(_0( [\ZERO\cdot {\bf b}] + 0) + _1( _0( [\ZERO\cdot {\bf a}^*] + _1[ _1\ONE \cdot {\bf a}^*]) + _1( [\ZERO \cdot {\bf a}] + \ONE)  ))$,
  1421 \end{center}
  1423 \end{center}
  1422 
  1424 
  1423 \noindent
  1425 \noindent
  1424 and this simplifies to
  1426 and this simplifies to
  1425 \begin{center}
  1427 \begin{center}
  1426 $ _1(_{011}{\bf a}^* +  _1\ONE)  $ 
  1428 $ _1(_{011}{\bf a}^* +  _1\ONE)  $ 
  1427 \end{center}
  1429 \end{center}
  1428 
  1430 because when  $(\rup\backslash a)\backslash a$ goes 
       
  1431 through simplification, according to our new $\simp$
       
  1432 clause,
       
  1433 each component of the list
       
  1434 $[_0( [\ZERO\cdot {\bf b}] + 0) , _1( _0( [\ZERO\cdot {\bf a}^*] + _1[ _1\ONE \cdot {\bf a}^*]) + _1( [\ZERO \cdot {\bf a}] + \ONE)  )]$
       
  1435 is simplified into
       
  1436 $[\ZERO, _1(_{011}{\bf a}^* +  _1\ONE) ]$,
       
  1437 this fits into the definition of $\textit{hollowAlternatives}$,
       
  1438 so the structure of the annotated regular expression
       
  1439 \begin{center}
       
  1440 $_1(_{011}{\bf a}^* +  _1\ONE) $
       
  1441 \end{center}
       
  1442 is preserved, in the sense that the outside bit $_1$
       
  1443 is not fused inside.
  1429 If, after the first derivative we apply simplification we get
  1444 If, after the first derivative we apply simplification we get
  1430 $(_0{\bf b}  + _{101}{\bf  a}^* + _{11}{\bf a}  )$,
  1445 $(_0{\bf b}  + _{101}{\bf  a}^* + _{11}{\bf a}  )$,
  1431 and we do another derivative, getting
  1446 and we do another derivative, getting
  1432 $(\ZERO  + (_{101}(\ONE \cdot _1{\bf a}^*)+_{11}\ONE)$,
  1447 $(\ZERO  + (_{101}(\ONE \cdot _1{\bf a}^*)+_{11}\ONE)$,
  1433 which simplifies to 
  1448 which simplifies to