# HG changeset patch # User Chengsong # Date 1579733438 0 # Node ID a85c0f0fcf4444e16d3e50a9e40b412488e53efb # Parent 79f347cb8b4d2c00a7b14b81bf0d509aafcc0771 f diff -r 79f347cb8b4d -r a85c0f0fcf44 Spiral.scala --- a/Spiral.scala Mon Jan 20 15:51:06 2020 +0000 +++ b/Spiral.scala Wed Jan 22 22:50:38 2020 +0000 @@ -857,8 +857,8 @@ //find_re() //tellmewhy() //correctness_proof_convenient_path() - //tellmewhy() - have_fun() + tellmewhy() + //have_fun() //string_der_test() //comp(rd_string_gen(3,6).toList, random_struct_gen(7)) //newxp1() diff -r 79f347cb8b4d -r a85c0f0fcf44 etnms/etnms.tex --- a/etnms/etnms.tex Mon Jan 20 15:51:06 2020 +0000 +++ b/etnms/etnms.tex Wed Jan 22 22:50:38 2020 +0000 @@ -4,6 +4,7 @@ \usepackage{tikz-cd} %\usepackage{algorithm} \usepackage{amsmath} +\usepackage{xcolor} \usepackage[noend]{algpseudocode} \usepackage{enumitem} \usepackage{nccmath} @@ -926,8 +927,8 @@ we use the helper function retrieve described by Sulzmann and Lu: \begin{center} \begin{tabular}{@{}l@{\hspace{2mm}}c@{\hspace{2mm}}l@{}} - $\textit{retrieve}\,(\textit{ONE}\,bs)\,\Empty$ & $\dn$ & $bs$\\ - $\textit{retrieve}\,(\textit{CHAR}\,bs\,c)\,(\Char\,d)$ & $\dn$ & $bs$\\ + $\textit{retrieve}\,(_{bs}\ONE)\,\Empty$ & $\dn$ & $bs$\\ + $\textit{retrieve}\,(_{bs}{\bf c})\,(\Char\,d)$ & $\dn$ & $bs$\\ $\textit{retrieve}\,(\textit{ALTS}\,bs\,a::as)\,(\Left\,v)$ & $\dn$ & $bs \,@\, \textit{retrieve}\,a\,v$\\ $\textit{retrieve}\,(\textit{ALTS}\,bs\,a::as)\,(\Right\,v)$ & $\dn$ & @@ -1214,12 +1215,72 @@ $\rup\backslash_{simp} \, s$ and $\simp(\rup\backslash s)$: \[ -D = (c_1c_2, \{r_1\cdot r_2 \mid \text{$ c_1\in L(r_1), r_1 \; not \; \nullable, c_2 \in L(r_2), -r_2 \backslash c_2$ is of shape alternative and $c_1c_2 \notin L(r_1)$}\}) +D = (c_1c_2, \{r_1\cdot r_2 \mid \text{$\simp(r_2) = r_2, \simp(r_1 \backslash c_1) = \ONE, r_1 \; not \; \nullable, c_2 \in L(r_2), +\simp(r_2 \backslash c_2)$ is of shape alternative and $c_1c_2 \notin L(r_1)$}\}) \] -and we can use this to construct a set of examples based -on this type of behaviour of two operations: -$r_1$ +The culprit is the different order in which simplification is applied. +For $\rup \backslash_{simp} s$, +\begin{center} +\begin{tabular}{lcl} +$(r_1\cdot r_2)\backslash_{simp} \, [c_1c_2]$ & $= \simp\left[ \big(\simp\left[ \left( r_1\cdot r_2 \right) \backslash c_1\right] \big)\backslash c_2\right]$\\ + & $= \simp\left[ \big(\simp \left[ \left(r_1 \backslash c_1\right) \cdot r_2 \right] \big) \backslash c_2 \right]$\\ + & $= \simp \left[ (\fuse \; \bmkeps(r_1\backslash c_1) \; \simp(r_2) ) \backslash c_2 \right]$, +\end{tabular} +\end{center} +\noindent +from the definition of $D$ we know $r_1 \backslash c_1$ is nullable, therefore +$\bmkeps(r_1\backslash c_1)$ returns a bitcode, we shall call it + $bs$. Also, $\simp(r_2 \backslash c_2)$ is a certain alternative, + we shall call it $_{bs_1}\oplus rs$, and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$ + simplifies to $\ZERO$, + so the above term can be rewritten as +\begin{center} +\begin{tabular}{lcl} +$\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs++bs1}}\oplus rs] ) \; \textit{match} $ \\ + $\textit{case} \; [] \Rightarrow \ZERO$ \\ + $\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\ + $\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$\\ +\end{tabular} +\end{center} +this, in turn, can be rewritten as $map (\fuse \textit{bs}++\textit{bs1}) rs$ based on +the properties of the function $\simp$(more on this later). +simplification is done along the way, disallowing the structure of nested alternatives, +and this can actually be proven: simp(r) does not contain +nested alternatives. +\[ +\simp \left[(r_1\cdot r_2) \backslash [c_1c_2] \right]= \simp \left[ ((r_1 \cdot r_2 )\backslash c_1) \backslash c_2 \right] +\] +For $\simp(\rup \backslash s)$, +\begin{center} +\begin{tabular}{lcl} +$\simp\big[(r_1\cdot r_2)\backslash \, [c_1c_2]\big]$ & $= \simp\left[ \big( \left( r_1\cdot r_2 \right) \backslash c_1 \big)\backslash c_2\right]$\\ + & $= \simp\left[ \left(r_1 \backslash c_1\right) \cdot r_2 \big) \backslash c_2 \right]$\\ + & $= \simp \left[ \oplus[\big( \left(r_1 \backslash c_1\right) \backslash c_2 \big) \cdot r_2 \; , \; \fuse \; \bmkeps(r_1\backslash c_1) \; r_2 \backslash c_2 ] \right]$, +\end{tabular} +\end{center} +\noindent +as before, we call the bitcode returned by $\bmkeps(r_1\backslash c_1)$ + $bs$. Also, $\simp(r_2 \backslash c_2)$ is a certain alternative, + we shall call it $_{bs_1}\oplus rs$, and $( \left(r_1 \backslash c_1\right) \backslash c_2 \cdot r_2)$ + simplifies to $\ZERO$, + so the above term can be rewritten as +\begin{center} +\begin{tabular}{lcl} +$\textit{distinct}(\flatten[\ZERO\;, \; _{\textit{bs}++\textit{bs1}}\oplus rs] ) \; \textit{match} $ \\ + $\textit{case} \; [] \Rightarrow \ZERO$ \\ + $\textit{case} \; a :: [] \Rightarrow \textit{fuse bs a}$ \\ + $\textit{case} \; as' \Rightarrow \textit{ALTS}\;bs\;as'$\\ +\end{tabular} +\end{center} + +this, in turn, can be rewritten as $map (\fuse\; \textit{bs}++\textit{bs1}) rs$ based on +the properties of the function $\simp$(more on this later). + +simplification is done along the way, disallowing the structure of nested alternatives, +and this can actually be proven: simp(r) does not contain +nested alternatives. + +%CONSTRUCTION SITE HERE that is to say, despite the bits are being moved around on the regular expression (difference in bits), the structure of the (unannotated)regular expression after one simplification is exactly the same after the @@ -1227,7 +1288,7 @@ regardless of whether we did simplification along the way. One way would be to give a function that calls - %CONSTRUCTION SITE + fuse is the culprit: it causes the order in which alternatives are opened up to be remembered and finally the difference appear in $\simp(\rup \backslash s)$ and $\rup \backslash{simp} \,s$. @@ -1251,8 +1312,6 @@ i parallelly tried another method of using retrieve\\ - -%HERE CONSTRUCTION SITE The vsimp function, defined as follows tries to simplify the value in lockstep with regular expression:\\