471 \begin{center} |
471 \begin{center} |
472 \begin{tabular}{lcl} |
472 \begin{tabular}{lcl} |
473 $\textit{a}$ & $::=$ & $\ZERO$\\ |
473 $\textit{a}$ & $::=$ & $\ZERO$\\ |
474 & $\mid$ & $_{bs}\ONE$\\ |
474 & $\mid$ & $_{bs}\ONE$\\ |
475 & $\mid$ & $_{bs}{\bf c}$\\ |
475 & $\mid$ & $_{bs}{\bf c}$\\ |
476 & $\mid$ & $_{bs}\oplus\,as$\\ |
476 & $\mid$ & $_{bs}\sum\,as$\\ |
477 & $\mid$ & $_{bs}a_1\cdot a_2$\\ |
477 & $\mid$ & $_{bs}a_1\cdot a_2$\\ |
478 & $\mid$ & $_{bs}a^*$ |
478 & $\mid$ & $_{bs}a^*$ |
479 \end{tabular} |
479 \end{tabular} |
480 \end{center} |
480 \end{center} |
481 %(in \textit{ALTS}) |
481 %(in \textit{ALTS}) |
482 |
482 |
483 \noindent |
483 \noindent |
484 where $bs$ stands for bitcodes, $a$ for $\bold{a}$nnotated regular |
484 where $bs$ stands for bitcodes, $a$ for $\bold{a}$nnotated regular |
485 expressions and $as$ for a list of annotated regular expressions. |
485 expressions and $as$ for a list of annotated regular expressions. |
486 The alternative constructor($\oplus$) has been generalized to |
486 The alternative constructor($\sum$) has been generalized to |
487 accept a list of annotated regular expressions rather than just 2. |
487 accept a list of annotated regular expressions rather than just 2. |
488 We will show that these bitcodes encode information about |
488 We will show that these bitcodes encode information about |
489 the (POSIX) value that should be generated by the Sulzmann and Lu |
489 the (POSIX) value that should be generated by the Sulzmann and Lu |
490 algorithm. |
490 algorithm. |
491 |
491 |
500 \begin{tabular}{lcl} |
500 \begin{tabular}{lcl} |
501 $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\ |
501 $(\ZERO)^\uparrow$ & $\dn$ & $\ZERO$\\ |
502 $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\ |
502 $(\ONE)^\uparrow$ & $\dn$ & $_{[]}\ONE$\\ |
503 $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\ |
503 $(c)^\uparrow$ & $\dn$ & $_{[]}{\bf c}$\\ |
504 $(r_1 + r_2)^\uparrow$ & $\dn$ & |
504 $(r_1 + r_2)^\uparrow$ & $\dn$ & |
505 $_{[]}\oplus[\textit{fuse}\,[0]\,r_1^\uparrow,\, |
505 $_{[]}\sum[\textit{fuse}\,[0]\,r_1^\uparrow,\, |
506 \textit{fuse}\,[1]\,r_2^\uparrow]$\\ |
506 \textit{fuse}\,[1]\,r_2^\uparrow]$\\ |
507 $(r_1\cdot r_2)^\uparrow$ & $\dn$ & |
507 $(r_1\cdot r_2)^\uparrow$ & $\dn$ & |
508 $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\ |
508 $_{[]}r_1^\uparrow \cdot r_2^\uparrow$\\ |
509 $(r^*)^\uparrow$ & $\dn$ & |
509 $(r^*)^\uparrow$ & $\dn$ & |
510 $_{[]}(r^\uparrow)^*$\\ |
510 $_{[]}(r^\uparrow)^*$\\ |
524 $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\ |
524 $\textit{fuse}\;bs \; \ZERO$ & $\dn$ & $\ZERO$\\ |
525 $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ & |
525 $\textit{fuse}\;bs\; _{bs'}\ONE$ & $\dn$ & |
526 $_{bs @ bs'}\ONE$\\ |
526 $_{bs @ bs'}\ONE$\\ |
527 $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ & |
527 $\textit{fuse}\;bs\;_{bs'}{\bf c}$ & $\dn$ & |
528 $_{bs@bs'}{\bf c}$\\ |
528 $_{bs@bs'}{\bf c}$\\ |
529 $\textit{fuse}\;bs\,_{bs'}\oplus\textit{as}$ & $\dn$ & |
529 $\textit{fuse}\;bs\,_{bs'}\sum\textit{as}$ & $\dn$ & |
530 $_{bs@bs'}\oplus\textit{as}$\\ |
530 $_{bs@bs'}\sum\textit{as}$\\ |
531 $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ & |
531 $\textit{fuse}\;bs\; _{bs'}a_1\cdot a_2$ & $\dn$ & |
532 $_{bs@bs'}a_1 \cdot a_2$\\ |
532 $_{bs@bs'}a_1 \cdot a_2$\\ |
533 $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ & |
533 $\textit{fuse}\;bs\,_{bs'}a^*$ & $\dn$ & |
534 $_{bs @ bs'}a^*$ |
534 $_{bs @ bs'}a^*$ |
535 \end{tabular} |
535 \end{tabular} |
593 $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\ |
593 $(\ZERO)\,\backslash c$ & $\dn$ & $\ZERO$\\ |
594 $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\ |
594 $(_{bs}\ONE)\,\backslash c$ & $\dn$ & $\ZERO$\\ |
595 $(_{bs}{\bf d})\,\backslash c$ & $\dn$ & |
595 $(_{bs}{\bf d})\,\backslash c$ & $\dn$ & |
596 $\textit{if}\;c=d\; \;\textit{then}\; |
596 $\textit{if}\;c=d\; \;\textit{then}\; |
597 _{bs}\ONE\;\textit{else}\;\ZERO$\\ |
597 _{bs}\ONE\;\textit{else}\;\ZERO$\\ |
598 $(_{bs}\oplus \;\textit{as})\,\backslash c$ & $\dn$ & |
598 $(_{bs}\sum \;\textit{as})\,\backslash c$ & $\dn$ & |
599 $_{bs}\oplus\;(\textit{as.map}(\backslash c))$\\ |
599 $_{bs}\sum\;(\textit{as.map}(\backslash c))$\\ |
600 $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ & |
600 $(_{bs}\;a_1\cdot a_2)\,\backslash c$ & $\dn$ & |
601 $\textit{if}\;\textit{bnullable}\,a_1$\\ |
601 $\textit{if}\;\textit{bnullable}\,a_1$\\ |
602 & &$\textit{then}\;_{bs}\oplus\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\ |
602 & &$\textit{then}\;_{bs}\sum\,[(_{[]}\,(a_1\,\backslash c)\cdot\,a_2),$\\ |
603 & &$\phantom{\textit{then},\;_{bs}\oplus\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\ |
603 & &$\phantom{\textit{then},\;_{bs}\sum\,}(\textit{fuse}\,(\textit{bmkeps}\,a_1)\,(a_2\,\backslash c))]$\\ |
604 & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\ |
604 & &$\textit{else}\;_{bs}\,(a_1\,\backslash c)\cdot a_2$\\ |
605 $(_{bs}a^*)\,\backslash c$ & $\dn$ & |
605 $(_{bs}a^*)\,\backslash c$ & $\dn$ & |
606 $_{bs}(\textit{fuse}\, [0] \; r\,\backslash c)\cdot |
606 $_{bs}(\textit{fuse}\, [0] \; r\,\backslash c)\cdot |
607 (_{[]}r^*))$ |
607 (_{[]}r^*))$ |
608 \end{tabular} |
608 \end{tabular} |
623 \backslash c)$ will collapse the regular expression $a_1$(as it has |
623 \backslash c)$ will collapse the regular expression $a_1$(as it has |
624 already been fully matched) and store the parsing information at the |
624 already been fully matched) and store the parsing information at the |
625 head of the regular expression $a_2 \backslash c$ by fusing to it. The |
625 head of the regular expression $a_2 \backslash c$ by fusing to it. The |
626 bitsequence $\textit{bs}$, which was initially attached to the |
626 bitsequence $\textit{bs}$, which was initially attached to the |
627 first element of the sequence $a_1 \cdot a_2$, has |
627 first element of the sequence $a_1 \cdot a_2$, has |
628 now been elevated to the top-level of $\oplus$, as this information will be |
628 now been elevated to the top-level of $\sum$, as this information will be |
629 needed whichever way the sequence is matched---no matter whether $c$ belongs |
629 needed whichever way the sequence is matched---no matter whether $c$ belongs |
630 to $a_1$ or $ a_2$. After building these derivatives and maintaining all |
630 to $a_1$ or $ a_2$. After building these derivatives and maintaining all |
631 the lexing information, we complete the lexing by collecting the |
631 the lexing information, we complete the lexing by collecting the |
632 bitcodes using a generalised version of the $\textit{mkeps}$ function |
632 bitcodes using a generalised version of the $\textit{mkeps}$ function |
633 for annotated regular expressions, called $\textit{bmkeps}$: |
633 for annotated regular expressions, called $\textit{bmkeps}$: |
635 |
635 |
636 %\begin{definition}[\textit{bmkeps}]\mbox{} |
636 %\begin{definition}[\textit{bmkeps}]\mbox{} |
637 \begin{center} |
637 \begin{center} |
638 \begin{tabular}{lcl} |
638 \begin{tabular}{lcl} |
639 $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\ |
639 $\textit{bmkeps}\,(_{bs}\ONE)$ & $\dn$ & $bs$\\ |
640 $\textit{bmkeps}\,(_{bs}\oplus a::\textit{as})$ & $\dn$ & |
640 $\textit{bmkeps}\,(_{bs}\sum a::\textit{as})$ & $\dn$ & |
641 $\textit{if}\;\textit{bnullable}\,a$\\ |
641 $\textit{if}\;\textit{bnullable}\,a$\\ |
642 & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\ |
642 & &$\textit{then}\;bs\,@\,\textit{bmkeps}\,a$\\ |
643 & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{bs}\oplus \textit{as})$\\ |
643 & &$\textit{else}\;bs\,@\,\textit{bmkeps}\,(_{bs}\sum \textit{as})$\\ |
644 $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ & |
644 $\textit{bmkeps}\,(_{bs} a_1 \cdot a_2)$ & $\dn$ & |
645 $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\ |
645 $bs \,@\,\textit{bmkeps}\,a_1\,@\, \textit{bmkeps}\,a_2$\\ |
646 $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ & |
646 $\textit{bmkeps}\,(_{bs}a^*)$ & $\dn$ & |
647 $bs \,@\, [0]$ |
647 $bs \,@\, [0]$ |
648 \end{tabular} |
648 \end{tabular} |
682 that the expected bound can be achieved. Obviously we could only |
682 that the expected bound can be achieved. Obviously we could only |
683 partially cover the search space as there are infinitely many regular |
683 partially cover the search space as there are infinitely many regular |
684 expressions and strings. |
684 expressions and strings. |
685 |
685 |
686 One modification we introduced is to allow a list of annotated regular |
686 One modification we introduced is to allow a list of annotated regular |
687 expressions in the $\oplus$ constructor. This allows us to not just |
687 expressions in the $\sum$ constructor. This allows us to not just |
688 delete unnecessary $\ZERO$s and $\ONE$s from regular expressions, but |
688 delete unnecessary $\ZERO$s and $\ONE$s from regular expressions, but |
689 also unnecessary ``copies'' of regular expressions (very similar to |
689 also unnecessary ``copies'' of regular expressions (very similar to |
690 simplifying $r + r$ to just $r$, but in a more general setting). Another |
690 simplifying $r + r$ to just $r$, but in a more general setting). Another |
691 modification is that we use simplification rules inspired by Antimirov's |
691 modification is that we use simplification rules inspired by Antimirov's |
692 work on partial derivatives. They maintain the idea that only the first |
692 work on partial derivatives. They maintain the idea that only the first |
705 &&$\quad\textit{case} \; (\_, \ZERO) \Rightarrow \ZERO$ \\ |
705 &&$\quad\textit{case} \; (\_, \ZERO) \Rightarrow \ZERO$ \\ |
706 &&$\quad\textit{case} \; (\ONE, a_2') \Rightarrow \textit{fuse} \; bs \; a_2'$ \\ |
706 &&$\quad\textit{case} \; (\ONE, a_2') \Rightarrow \textit{fuse} \; bs \; a_2'$ \\ |
707 &&$\quad\textit{case} \; (a_1', \ONE) \Rightarrow \textit{fuse} \; bs \; a_1'$ \\ |
707 &&$\quad\textit{case} \; (a_1', \ONE) \Rightarrow \textit{fuse} \; bs \; a_1'$ \\ |
708 &&$\quad\textit{case} \; (a_1', a_2') \Rightarrow _{bs}a_1' \cdot a_2'$ \\ |
708 &&$\quad\textit{case} \; (a_1', a_2') \Rightarrow _{bs}a_1' \cdot a_2'$ \\ |
709 |
709 |
710 $\textit{simp} \; (_{bs}\oplus \textit{as})$ & $\dn$ & $\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $ \\ |
710 $\textit{simp} \; (_{bs}\sum \textit{as})$ & $\dn$ & $\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $ \\ |
711 &&$\quad\textit{case} \; [] \Rightarrow \ZERO$ \\ |
711 &&$\quad\textit{case} \; [] \Rightarrow \ZERO$ \\ |
712 &&$\quad\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\ |
712 &&$\quad\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\ |
713 &&$\quad\textit{case} \; as' \Rightarrow _{bs}\oplus \textit{as'}$\\ |
713 &&$\quad\textit{case} \; as' \Rightarrow _{bs}\sum \textit{as'}$\\ |
714 |
714 |
715 $\textit{simp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$ |
715 $\textit{simp} \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$ |
716 \end{tabular} |
716 \end{tabular} |
717 \end{center} |
717 \end{center} |
718 |
718 |
720 The simplification does a pattern matching on the regular expression. |
720 The simplification does a pattern matching on the regular expression. |
721 When it detected that the regular expression is an alternative or |
721 When it detected that the regular expression is an alternative or |
722 sequence, it will try to simplify its children regular expressions |
722 sequence, it will try to simplify its children regular expressions |
723 recursively and then see if one of the children turn into $\ZERO$ or |
723 recursively and then see if one of the children turn into $\ZERO$ or |
724 $\ONE$, which might trigger further simplification at the current level. |
724 $\ONE$, which might trigger further simplification at the current level. |
725 The most involved part is the $\oplus$ clause, where we use two |
725 The most involved part is the $\sum$ clause, where we use two |
726 auxiliary functions $\textit{flatten}$ and $\textit{distinct}$ to open up nested |
726 auxiliary functions $\textit{flatten}$ and $\textit{distinct}$ to open up nested |
727 alternatives and reduce as many duplicates as possible. Function |
727 alternatives and reduce as many duplicates as possible. Function |
728 $\textit{distinct}$ keeps the first occurring copy only and remove all later ones |
728 $\textit{distinct}$ keeps the first occurring copy only and remove all later ones |
729 when detected duplicates. Function $\textit{flatten}$ opens up nested $\oplus$s. |
729 when detected duplicates. Function $\textit{flatten}$ opens up nested $\sum$s. |
730 Its recursive definition is given below: |
730 Its recursive definition is given below: |
731 |
731 |
732 \begin{center} |
732 \begin{center} |
733 \begin{tabular}{@{}lcl@{}} |
733 \begin{tabular}{@{}lcl@{}} |
734 $\textit{flatten} \; (_{bs}\oplus \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \; |
734 $\textit{flatten} \; (_{bs}\sum \textit{as}) :: \textit{as'}$ & $\dn$ & $(\textit{map} \; |
735 (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{flatten} \; as' $ \\ |
735 (\textit{fuse}\;bs)\; \textit{as}) \; @ \; \textit{flatten} \; as' $ \\ |
736 $\textit{flatten} \; \ZERO :: as'$ & $\dn$ & $ \textit{flatten} \; \textit{as'} $ \\ |
736 $\textit{flatten} \; \ZERO :: as'$ & $\dn$ & $ \textit{flatten} \; \textit{as'} $ \\ |
737 $\textit{flatten} \; a :: as'$ & $\dn$ & $a :: \textit{flatten} \; \textit{as'}$ \quad(otherwise) |
737 $\textit{flatten} \; a :: as'$ & $\dn$ & $a :: \textit{flatten} \; \textit{as'}$ \quad(otherwise) |
738 \end{tabular} |
738 \end{tabular} |
739 \end{center} |
739 \end{center} |
959 we use the helper function retrieve described by Sulzmann and Lu: |
959 we use the helper function retrieve described by Sulzmann and Lu: |
960 \begin{center} |
960 \begin{center} |
961 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}} |
961 \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}} |
962 $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\ |
962 $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\ |
963 $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\ |
963 $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\ |
964 $\textit{retrieve}\,(_{bs}\oplus a::as)\,(\Left\,v)$ & $\dn$ & |
964 $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Left\,v)$ & $\dn$ & |
965 $bs \,@\, \textit{retrieve}\,a\,v$\\ |
965 $bs \,@\, \textit{retrieve}\,a\,v$\\ |
966 $\textit{retrieve}\,(_{bs}\oplus a::as)\,(\Right\,v)$ & $\dn$ & |
966 $\textit{retrieve}\,(_{bs}\sum a::as)\,(\Right\,v)$ & $\dn$ & |
967 $\textit{bs} \,@\, \textit{retrieve}\,(_{[]}\oplus as)\,v$\\ |
967 $\textit{bs} \,@\, \textit{retrieve}\,(_{[]}\sum as)\,v$\\ |
968 $\textit{retrieve}\,(_{bs}a_1\cdot a_2)\,(\Seq\,v_1\,v_2)$ & $\dn$ & |
968 $\textit{retrieve}\,(_{bs}a_1\cdot a_2)\,(\Seq\,v_1\,v_2)$ & $\dn$ & |
969 $bs \,@\,\textit{retrieve}\,a_1\,v_1\,@\, \textit{retrieve}\,a_2\,v_2$\\ |
969 $bs \,@\,\textit{retrieve}\,a_1\,v_1\,@\, \textit{retrieve}\,a_2\,v_2$\\ |
970 $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,[])$ & $\dn$ & |
970 $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,[])$ & $\dn$ & |
971 $bs \,@\, [0]$\\ |
971 $bs \,@\, [0]$\\ |
972 $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,(v\!::\!vs))$ & $\dn$ &\\ |
972 $\textit{retrieve}\,(_{bs}a^*)\,(\Stars\,(v\!::\!vs))$ & $\dn$ &\\ |
1163 \end{tabular} |
1163 \end{tabular} |
1164 \end{center} |
1164 \end{center} |
1165 |
1165 |
1166 \noindent |
1166 \noindent |
1167 |
1167 |
1168 and the difinition of $\flatten$: |
1168 and the definition of $\flatten$: |
1169 \begin{center} |
1169 \begin{center} |
1170 \begin{tabular}{c c c} |
1170 \begin{tabular}{c c c} |
1171 $\flatten \; []$ & $\dn$ & $[]$\\ |
1171 $\flatten \; []$ & $\dn$ & $[]$\\ |
1172 $\flatten \; \ZERO::rs$ & $\dn$ & $rs$\\ |
1172 $\flatten \; \ZERO::rs$ & $\dn$ & $rs$\\ |
1173 $\flatten \;(_{\textit{bs}_1}\oplus \textit{rs}_1 ::rs)$ & $\dn$ & $(\map \, (\fuse \, \textit{bs}_1) \,\textit{rs}_1) ::: \flatten(rs)$\\ |
1173 $\flatten \;(_{\textit{bs}_1}\sum \textit{rs}_1 ::rs)$ & $\dn$ & $(\map \, (\fuse \, \textit{bs}_1) \,\textit{rs}_1) ::: \flatten(rs)$\\ |
1174 $\flatten \; r :: rs$ & $\dn$ & $r::\flatten(rs)$ |
1174 $\flatten \; r :: rs$ & $\dn$ & $r::\flatten(rs)$ |
1175 \end{tabular} |
1175 \end{tabular} |
1176 \end{center} |
1176 \end{center} |
1177 |
1177 |
1178 \noindent |
1178 \noindent |
1179 If we call $\simp$ on $\rup\backslash a$, just as $\backslash_{simp}$ |
1179 If we call $\simp$ on $\rup\backslash a$, just as $\backslash_{simp}$ |
1180 requires, then we would go throught the third clause of |
1180 requires, then we would go through the third clause of |
1181 the sequence case:$\quad\textit{case} \; (\ONE, a_2') \Rightarrow \textit{fuse} \; bs \; a_2'$. |
1181 the sequence case:$\quad\textit{case} \; (\ONE, a_2') \Rightarrow \textit{fuse} \; bs \; a_2'$. |
1182 The $\ZERO$ of $(_0\ONE + \ZERO)$ is thrown away |
1182 The $\ZERO$ of $(_0\ONE + \ZERO)$ is thrown away |
1183 by $\flatten$ and |
1183 by $\flatten$ and |
1184 $_0\ONE$ merged into $(_0a + _1a^*)$ by simply |
1184 $_0\ONE$ merged into $(_0a + _1a^*)$ by simply |
1185 putting its bits($_0$) to the front of the second component: |
1185 putting its bits($_0$) to the front of the second component: |
1188 namely, $(_0(_0a + _1a^*))\backslash a$, we get |
1188 namely, $(_0(_0a + _1a^*))\backslash a$, we get |
1189 $ |
1189 $ |
1190 _0(_0 \ONE + _1(_1\ONE \cdot a^*)) |
1190 _0(_0 \ONE + _1(_1\ONE \cdot a^*)) |
1191 $, and this simplifies to $_0(_0 \ONE + _{11} a^*)$ |
1191 $, and this simplifies to $_0(_0 \ONE + _{11} a^*)$ |
1192 by the third clause of the alternative case: |
1192 by the third clause of the alternative case: |
1193 $\quad\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$. |
1193 \begin{center} |
|
1194 $\quad\textit{case} \; as' \Rightarrow _{bs}\sum{as'}$. |
|
1195 \end{center} |
|
1196 |
|
1197 \noindent |
1194 The outmost bit $_0$ stays with |
1198 The outmost bit $_0$ stays with |
1195 the outmost regular expression, rather than being fused to |
1199 the outmost regular expression, rather than being fused to |
1196 its child regular expressions, as what we will later see happens |
1200 its child regular expressions, as what we will later see happens |
1197 to $\simp(\rup\backslash \, s)$. |
1201 to $\simp(\rup\backslash \, s)$. |
1198 If we choose to not simplify in the midst of derivative operations, |
1202 If we choose to not simplify in the midst of derivative operations, |
1238 $\rup\backslash_{simp} \, s$ |
1242 $\rup\backslash_{simp} \, s$ |
1239 and $\simp(\rup\backslash s)$: |
1243 and $\simp(\rup\backslash s)$: |
1240 \begin{center} |
1244 \begin{center} |
1241 \begin{tabular}{lcl} |
1245 \begin{tabular}{lcl} |
1242 $D =\{ (r_1 \cdot r_2,\; [c_1c_2]) \mid $ & $\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE,$\\ |
1246 $D =\{ (r_1 \cdot r_2,\; [c_1c_2]) \mid $ & $\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE,$\\ |
1243 $r_1 \; not \; \nullable, c_2 \in L(r_2),$ & $\exists \textit{rs},\textit{bs}.\; r_2 \backslash c_2 = _{bs}{\oplus rs}$\\ |
1247 $r_1 \; not \; \nullable, c_2 \in L(r_2),$ & $\exists \textit{rs},\textit{bs}.\; r_2 \backslash c_2 = _{bs}{\sum rs}$\\ |
1244 $\exists \textit{rs}_1. \; \simp(r_2 \backslash c_2) = _{bs}{\oplus \textit{rs}_1}$ & $and \;\simp(r_1 \backslash [c_1c_2]) = \ZERO\}$\\ |
1248 $\exists \textit{rs}_1. \; \simp(r_2 \backslash c_2) = _{bs}{\sum \textit{rs}_1}$ & $and \;\simp(r_1 \backslash [c_1c_2]) = \ZERO\}$\\ |
1245 \end{tabular} |
1249 \end{tabular} |
1246 \end{center} |
1250 \end{center} |
1247 We take a pair $(r, \;s)$ from the set $D$. |
1251 We take a pair $(r, \;s)$ from the set $D$. |
1248 |
1252 |
1249 Now we compute ${\bf \rup \backslash_{simp} s}$, we get: |
1253 Now we compute ${\bf \rup \backslash_{simp} s}$, we get: |
1282 \noindent |
1286 \noindent |
1283 Again, using the properties above, we obtain |
1287 Again, using the properties above, we obtain |
1284 the following chain of equalities: |
1288 the following chain of equalities: |
1285 \begin{center} |
1289 \begin{center} |
1286 $\simp(\rup \backslash s)= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]= \simp\left[ \left(r_1 \backslash c_1\right) \cdot r_2 \big) \backslash c_2 \right]$\\ |
1290 $\simp(\rup \backslash s)= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right]= \simp\left[ \left(r_1 \backslash c_1\right) \cdot r_2 \big) \backslash c_2 \right]$\\ |
1287 $= \simp \left[ \oplus[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; (\bmkeps \;r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$, |
1291 $= \simp \left[ \sum[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; (\bmkeps \;r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$, |
1288 \end{center} |
1292 \end{center} |
1289 \noindent |
1293 \noindent |
1290 as before, we call the bitcode returned by |
1294 as before, we call the bitcode returned by |
1291 $\bmkeps(r_1\backslash c_1)$ as |
1295 $\bmkeps(r_1\backslash c_1)$ as |
1292 $\textit{bs}_2$. |
1296 $\textit{bs}_2$. |
1293 Also, $\simp(r_2 \backslash c_2)$ is |
1297 Also, $\simp(r_2 \backslash c_2)$ is |
1294 $_{bs}\oplus \textit{rs}_1$, |
1298 $_{bs}\sum \textit{rs}_1$, |
1295 and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$ |
1299 and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$ |
1296 simplifies to $\ZERO$, |
1300 simplifies to $\ZERO$, |
1297 so the above term can be expanded as |
1301 so the above term can be expanded as |
1298 \begin{center} |
1302 \begin{center} |
1299 \begin{tabular}{l} |
1303 \begin{tabular}{l} |
1300 $\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}_2++\textit{bs}}\oplus \textit{rs}_1] ) \; \textit{match} $ \\ |
1304 $\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}_2++\textit{bs}}\sum \textit{rs}_1] ) \; \textit{match} $ \\ |
1301 $\textit{case} \; [] \Rightarrow \ZERO$ \\ |
1305 $\textit{case} \; [] \Rightarrow \ZERO$ \\ |
1302 $\textit{case} \; a :: [] \Rightarrow \textit{\fuse \; \textit{bs} a}$ \\ |
1306 $\textit{case} \; a :: [] \Rightarrow \textit{\fuse \; \textit{bs} a}$ \\ |
1303 $\textit{case} \; as' \Rightarrow _{[]}\oplus as'$\\ |
1307 $\textit{case} \; as' \Rightarrow _{[]}\sum as'$\\ |
1304 \end{tabular} |
1308 \end{tabular} |
1305 \end{center} |
1309 \end{center} |
1306 \noindent |
1310 \noindent |
1307 Applying the definition of $\flatten$, we get |
1311 Applying the definition of $\flatten$, we get |
1308 \begin{center} |
1312 \begin{center} |
1309 $_{[]}\oplus (\textit{map} \; \fuse (\textit{bs}_2 ++ bs) rs_1)$ |
1313 $_{[]}\sum (\textit{map} \; \fuse (\textit{bs}_2 ++ bs) rs_1)$ |
1310 \end{center} |
1314 \end{center} |
1311 \noindent |
1315 \noindent |
1312 compared to the result |
1316 compared to the result |
1313 \begin{center} |
1317 \begin{center} |
1314 $ \; _{bs_2++bs}{\oplus \textit{rs}_1} $ |
1318 $ \; _{bs_2++bs}{\sum \textit{rs}_1} $ |
1315 \end{center} |
1319 \end{center} |
1316 \noindent |
1320 \noindent |
1317 Note how these two regular expressions only |
1321 Note how these two regular expressions only |
1318 differ in terms of the position of the bits |
1322 differ in terms of the position of the bits |
1319 $\textit{bs}_2++\textit{bs}$. They are the same otherwise. |
1323 $\textit{bs}_2++\textit{bs}$. They are the same otherwise. |
1323 there exists an outer alternative. |
1327 there exists an outer alternative. |
1324 Note how the absence of simplification |
1328 Note how the absence of simplification |
1325 caused $\simp(\rup \backslash s)$ to |
1329 caused $\simp(\rup \backslash s)$ to |
1326 generate the nested alternatives structure: |
1330 generate the nested alternatives structure: |
1327 \begin{center} |
1331 \begin{center} |
1328 $ \oplus[\ZERO \;, \; _{bs}\oplus \textit{rs} ]$ |
1332 $ \sum[\ZERO \;, \; _{bs}\sum \textit{rs} ]$ |
1329 \end{center} |
1333 \end{center} |
1330 and this will always trigger the $\flatten$ to |
1334 and this will always trigger the $\flatten$ to |
1331 spill out the inner alternative's bitcode $\textit{bs}$, |
1335 spill out the inner alternative's bitcode $\textit{bs}$, |
1332 whereas when |
1336 whereas when |
1333 simplification is done along the way, |
1337 simplification is done along the way, |
1338 How about we do not allow the function $\simp$ |
1342 How about we do not allow the function $\simp$ |
1339 to fuse out the bits when it is unnecessary? |
1343 to fuse out the bits when it is unnecessary? |
1340 Like, for the above regular expression, we might |
1344 Like, for the above regular expression, we might |
1341 just delete the outer layer of alternative |
1345 just delete the outer layer of alternative |
1342 \begin{center} |
1346 \begin{center} |
1343 \st{$ {\oplus[\ZERO \;,}$} $_{bs}\oplus \textit{rs}$ \st{$]$} |
1347 \st{$ {\sum[\ZERO \;,}$} $_{bs}\sum \textit{rs}$ \st{$]$} |
1344 \end{center} |
1348 \end{center} |
1345 and get $_{bs}\oplus \textit{rs}$ instead, without |
1349 and get $_{bs}\sum \textit{rs}$ instead, without |
1346 fusing the bits $\textit{bs}$ inside to every element |
1350 fusing the bits $\textit{bs}$ inside to every element |
1347 of $\textit{rs}$. |
1351 of $\textit{rs}$. |
1348 This idea can be realized by making the following |
1352 This idea can be realized by making the following |
1349 changes to the $\simp$-function: |
1353 changes to the $\simp$-function: |
1350 \begin{center} |
1354 \begin{center} |
1351 \begin{tabular}{@{}lcl@{}} |
1355 \begin{tabular}{@{}lcl@{}} |
1352 |
1356 |
1353 $\textit{simp}' \; (_{\textit{bs}}(a_1 \cdot a_2))$ & $\dn$ & $\textit{as} \; \simp \; \textit{was} \; \textit{before} $ \\ |
1357 $\textit{simp}' \; (_{\textit{bs}}(a_1 \cdot a_2))$ & $\dn$ & $\textit{as} \; \simp \; \textit{was} \; \textit{before} $ \\ |
1354 |
1358 |
1355 $\textit{simp}' \; (_{bs}\oplus as)$ & $\dn$ & \st{$\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $} \\ |
1359 $\textit{simp}' \; (_{bs}\sum as)$ & $\dn$ & \st{$\textit{distinct}( \textit{flatten} ( \textit{map simp as})) \; \textit{match} $} \\ |
1356 &&\st{$\quad\textit{case} \; [] \Rightarrow \ZERO$} \\ |
1360 &&\st{$\quad\textit{case} \; [] \Rightarrow \ZERO$} \\ |
1357 &&\st{$\quad\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$} \\ |
1361 &&\st{$\quad\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$} \\ |
1358 &&\st{$\quad\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$}\\ |
1362 &&\st{$\quad\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$}\\ |
1359 &&$\textit{if}(\textit{hollowAlternatives}( \textit{map \; simp \; as}))$\\ |
1363 &&$\textit{if}(\textit{hollowAlternatives}( \textit{map \; simp \; as}))$\\ |
1360 &&$\textit{then} \; \fuse \; \textit{bs}\; \textit{extractAlt}(\textit{map} \; \simp \; \textit{as} )$\\ |
1364 &&$\textit{then} \; \fuse \; \textit{bs}\; \textit{extractAlt}(\textit{map} \; \simp \; \textit{as} )$\\ |
1361 &&$\textit{else} \; \simp(_{bs} \oplus \textit{as})$\\ |
1365 &&$\textit{else} \; \simp(_{bs} \sum \textit{as})$\\ |
1362 |
1366 |
1363 |
1367 |
1364 $\textit{simp}' \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$ |
1368 $\textit{simp}' \; a$ & $\dn$ & $\textit{a} \qquad \textit{otherwise}$ |
1365 \end{tabular} |
1369 \end{tabular} |
1366 \end{center} |
1370 \end{center} |
1367 |
1371 |
1368 \noindent |
1372 \noindent |
1369 given the definition of $\textit{hollowAlternatives}$ and $\textit{extractAlt}$ : |
1373 given the definition of $\textit{hollowAlternatives}$ and $\textit{extractAlt}$ : |
1370 \begin{center} |
1374 \begin{center} |
1371 $\textit{hollowAlternatives}( \textit{rs}) \dn |
1375 $\textit{hollowAlternatives}( \textit{rs}) \dn |
1372 \exists r = (_{\textit{bs}_1}\oplus \textit{rs}_1) \in \textit{rs}. \forall r' \in \textit{rs}, \; |
1376 \exists r = (_{\textit{bs}_1}\sum \textit{rs}_1) \in \textit{rs}. \forall r' \in \textit{rs}, \; |
1373 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r $ |
1377 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r $ |
1374 $\textit{extractAlt}( \textit{rs}) \dn \textit{if}\big( |
1378 $\textit{extractAlt}( \textit{rs}) \dn \textit{if}\big( |
1375 \exists r = (_{\textit{bs}_1}\oplus \textit{rs}_1) \in \textit{rs}. \forall r' \in \textit{rs}, \; |
1379 \exists r = (_{\textit{bs}_1}\sum \textit{rs}_1) \in \textit{rs}. \forall r' \in \textit{rs}, \; |
1376 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r \big)\; \textit{then} \; \textit{return} \; r$ |
1380 \textit{either} \; r' = \ZERO \; \textit{or} \; r' = r \big)\; \textit{then} \; \textit{return} \; r$ |
1377 \end{center} |
1381 \end{center} |
1378 \noindent |
1382 \noindent |
1379 Basically, $\textit{hollowAlternatives}$ captures the feature of |
1383 Basically, $\textit{hollowAlternatives}$ captures the feature of |
1380 a list of regular expression of the shape |
1384 a list of regular expression of the shape |
1381 \begin{center} |
1385 \begin{center} |
1382 $ \oplus[\ZERO \;, \; _{bs}\oplus \textit{rs} ]$ |
1386 $ \sum[\ZERO \;, \; _{bs}\sum \textit{rs} ]$ |
1383 \end{center} |
1387 \end{center} |
1384 and this means we can simply elevate the |
1388 and this means we can simply elevate the |
1385 inner regular expression $_{bs}\oplus \textit{rs}$ |
1389 inner regular expression $_{bs}\sum \textit{rs}$ |
1386 to the outmost |
1390 to the outmost |
1387 and throw away the useless $\ZERO$s and |
1391 and throw away the useless $\ZERO$s and |
1388 the outer $\oplus$ wrapper. |
1392 the outer $\sum$ wrapper. |
1389 Using this new definition of simp, |
1393 Using this new definition of simp, |
1390 under the example where $r$ is the regular expression |
1394 under the example where $r$ is the regular expression |
1391 $(a+b)(a+a*)$ and $s$ is the string $aa$ |
1395 $(a+b)(a+a*)$ and $s$ is the string $aa$ |
1392 the problem of $\rup\backslash_{simp} \, s \neq \simp(\rup\backslash s)$ |
1396 the problem of $\rup\backslash_{simp} \, s \neq \simp(\rup\backslash s)$ |
1393 is resolved. |
1397 is resolved. |
1458 $\textit{sub}(\ONE) \dn \{\ONE\}$\\ |
1462 $\textit{sub}(\ONE) \dn \{\ONE\}$\\ |
1459 $\textit{sub}(r_1 \cdot r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1 \cdot r_2\}$\\ |
1463 $\textit{sub}(r_1 \cdot r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1 \cdot r_2\}$\\ |
1460 $\textit{sub}(r_1 + r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1+r_2\}$\\ |
1464 $\textit{sub}(r_1 + r_2) \dn \textit{sub}(r_1) \cup \textit{sub}(r_2) \cup \{r_1+r_2\}$\\ |
1461 \end{center} |
1465 \end{center} |
1462 $\textit{good}(r) \dn r \neq \ZERO \land \\ |
1466 $\textit{good}(r) \dn r \neq \ZERO \land \\ |
1463 \forall r' \in \textit{sub}(r), \textit{if} \; r' = _{bs_1}\oplus(rs_1), \; |
1467 \forall r' \in \textit{sub}(r), \textit{if} \; r' = _{bs_1}\sum(rs_1), \; |
1464 \textit{then} \nexists r'' \in \textit{rs}_1 \; s.t.\; |
1468 \textit{then} \nexists r'' \in \textit{rs}_1 \; s.t.\; |
1465 r'' = _{bs_2}\oplus \textit{rs}_2 $ |
1469 r'' = _{bs_2}\sum \textit{rs}_2 $ |
1466 |
1470 |
1467 The properties are mainly the ones below: |
1471 The properties are mainly the ones below: |
1468 \begin{itemize} |
1472 \begin{itemize} |
1469 \item |
1473 \item |
1470 \begin{center} |
1474 \begin{center} |
1505 $_{bs}{r_1 \cdot r_2}$ & |
1509 $_{bs}{r_1 \cdot r_2}$ & |
1506 $\gg$ & |
1510 $\gg$ & |
1507 $\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\ |
1511 $\textit{bs} @ \textit{bs}_1 @ \textit{bs}_2$\\ |
1508 |
1512 |
1509 $\textit{if} \; r \gg \textit{bs}_1$ & & $\textit{then}$ & |
1513 $\textit{if} \; r \gg \textit{bs}_1$ & & $\textit{then}$ & |
1510 $_{bs}{\oplus(r :: \textit{rs}})$ & |
1514 $_{bs}{\sum(r :: \textit{rs}})$ & |
1511 $\gg$ & |
1515 $\gg$ & |
1512 $\textit{bs} @ \textit{bs}_1 $\\ |
1516 $\textit{bs} @ \textit{bs}_1 $\\ |
1513 |
1517 |
1514 $\textit{if} \; _{bs}(\oplus \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ & & $\textit{then}$ & |
1518 $\textit{if} \; _{bs}(\sum \textit{rs}) \gg \textit{bs} @ \textit{bs}_1$ & & $\textit{then}$ & |
1515 $_{bs}{\oplus(r :: \textit{rs}})$ & |
1519 $_{bs}{\sum(r :: \textit{rs}})$ & |
1516 $\gg$ & |
1520 $\gg$ & |
1517 $\textit{bs} @ \textit{bs}_1 $\\ |
1521 $\textit{bs} @ \textit{bs}_1 $\\ |
1518 |
1522 |
1519 $\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ & $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$ & |
1523 $\textit{if} \; r \gg \textit{bs}_1\; \textit{and}$ & $_{bs}r^* \gg \textit{bs} @ \textit{bs}_2$ & $\textit{then}$ & |
1520 $_{bs}r^* $ & |
1524 $_{bs}r^* $ & |