corr_pr_sketch.tex
changeset 9 2c02d27ec0a3
parent 8 e67c0ea1ca73
child 10 2b95bcd2ac73
--- a/corr_pr_sketch.tex	Thu Mar 21 13:26:07 2019 +0000
+++ b/corr_pr_sketch.tex	Fri Mar 22 12:53:56 2019 +0000
@@ -3,6 +3,7 @@
 \usepackage[english]{babel}
 \usepackage{listings}
  \usepackage{amsthm}
+ \usepackage{hyperref}
  \usepackage[margin=0.5in]{geometry}
  
 \theoremstyle{theorem}
@@ -11,6 +12,8 @@
 \theoremstyle{lemma}
 \newtheorem{lemma}{Lemma}
 
+\newcommand{\lemmaautorefname}{Lemma}
+
 \theoremstyle{definition}
  \newtheorem{definition}{Definition}
 \begin{document}
@@ -111,12 +114,15 @@
 \item{dB}\\
 means distinctBy
 \item{Co}\\
-The last matching clause of the function bsimp, namely
-      dist\_res match {
+The last matching clause of the function bsimp, with a slight modification to suit later reasoning.
+\begin{verbatim}
+def Co(bs1, rs): ARexp = {
+      rs match {
         case Nil => AZERO
         case s :: Nil => fuse(bs1, s)
         case rs => AALTS(bs1, rs)  
       }
+\end{verbatim}
 \end{itemize}
 \end{definition}
 
@@ -162,60 +168,80 @@
 
 \end{definition}
 
-\begin{definition}{mkepsBC invariant manipulation of bits and notation}\\
-ALTS(bs, ALTS(bs1, rs1), ALTS(bs2, rs2)) $\sim_{m\epsilon}$ ALTS(bs, rs1.map(fuse(bs1, \_)) ++ rs2.map(fuse(bs2, \_)) ). \\
-We also use $bs2>>rs2 $ as a shorthand notation for rs2.map(fuse(bs2,\_)).
 
-\end{definition}
 
 \begin{definition}{distinctBy operation expressed in a different way--how it transforms the list}\\
 Given two lists rs1 and rs2, we define the operation $--$:\\
 $rs1 -- rs2 := [r \in rs1 |  r \notin rs2]$
-Note that the order is preserved as in the original list.
+Note that the order each term appears in $rs_1 -- rs_2$ is preserved as in the original list.
 \end{definition}
 
 
 \section{Main Result}
-\begin{lemma}{simplification function does not simplify an already simplified regex}\\
+
+\begin{lemma}{simplification function does not simplify an already simplified regex}\label{lma1}\\
 bsimp(r) == bsimp(bsimp(r)) holds for any annotated regular expression r.
 \end{lemma}
 
-\begin{lemma}{simp and mkeps}\\
+\begin{lemma}{simp and mkeps}\label{lma2}\\
 When r is nullable, we have that
 mkeps(bsimp(r)) == mkeps(r)
 \end{lemma}
 
-\begin{lemma}{mkeps equivalence  w.r.t some syntactically different regular expressions(1 ALTS)}\\
-When one of the 2 regular expressions $s(r_1)$ and $s(r_2)$ is ALTS(bs1, rs1), we have that $ds(ALTS(bs, r1, r2)) \sim_{m\epsilon} d(ALTS(bs, sr_1, sr_2))$
+
+%\begin{theorem}See~\cref{lma1}.\end{theorem}
+%\begin{lemma}\label{lma1}\lipsum[2]\end{lemma}
+
+\begin{lemma}{mkeps equivalence  w.r.t some syntactically different regular expressions(1 ALTS)}\label{lma3}\\
+When one of the 2 regular expressions $s(r_1)$ and $s(r_2)$ is of the form ALTS(bs1, rs1), we have that $ds(ALTS(bs, r1, r2)) \sim_{m\epsilon} d(ALTS(bs, sr_1, sr_2))$
 \end{lemma}
 \begin{proof}
-By opening up one of the alts and show no additional changes are made.
+By opening up one of the alts and show no additional changes are made.\\
+Details: $ds(ALTS(bs, r1, r2)) = d Co( bs, dB(flats(sr1, sr2)) )$
 \end{proof}
 
-\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS)}\\
+
+\begin{lemma}{mkepsBC invariant manipulation of bits and notation}\label{lma7}\\
+ALTS(bs, ALTS(bs1, rs1), ALTS(bs2, rs2)) $\sim_{m\epsilon}$ ALTS(bs, rs1.map(fuse(bs1, \_)) ++ rs2.map(fuse(bs2, \_)) ). \\
+We also use $bs2>>rs2 $ as a shorthand notation for rs2.map(fuse(bs2,\_)).
+\end{lemma}
+
+\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS)}\label{lma4}\\
 $sr_1 = ALTS(bs1, rs1)$ and $sr_2 = ALTS(bs2, rs2)$ we have  $  d(sr_1 +sr_2 ) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$ 
 \end{lemma}
 \begin{proof}
 We are just fusing bits inside here, there is no other structural change.
 \end{proof}
 
-\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS+ some deletion)}\\
-$d Co(ALTS(bs, dB(bs1>>rs1 ++ bs2>>rs2))) \sim_{m\epsilon} d Co(ALTS(bs, dB(bs1>>rs1 ++ ((bs2>>rs2)--rs1)        ))) $ 
+\begin{lemma}{What does dB do to two already simplified ALTS}\label{lma5}\\
+$d Co(ALTS(bs, dB(bs1>>rs1 ++ bs2>>rs2))) = d Co(ALTS(bs, bs1>>rs1 ++ ((bs2>>rs2)--rs1)        )) $ 
 \end{lemma}
 \begin{proof}
-The removed parts have already appeared before in $rs_1$, so if any of them is truly nullable and is chosen as the mkeps path, it will have been traversed through in its previous counterpart.\\
-(We probably need to switch the position of lemma5 and lemma6)
+We prove that $ dB(bs1>>rs1 ++ bs2>>rs2) = bs1>>rs1 ++ ((bs2>>rs2)--rs1)$.
+
+
 \end{proof}
 
-\begin{lemma}{after opening two previously simplified alts up into terms, length must exceed 2}\\
- $d Co(ALTS(bs, rs        )) \sim_{m\epsilon} d(ALTS(bs, rs))$ if $rs$ is a list of length greater than or equal to 2.
+\begin{lemma}{after opening two previously simplified alts up into terms, length must exceed 2}\label{lma6}\\
+If sr1, sr2 are of the form ALTS(bs1, rs1), ALTS(bs2, rs2) respectively, then we have that 
+$Co(bs, (bs1>>rs1) ++ (bs2>>rs2)--rs1) = ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1)$
 \end{lemma}
 \begin{proof}
-As suggested by the title of this lemma
+ $Co(bs, rs) \sim_{m\epsilon} ALTS(bs, rs)$ if $rs$ is a list of length greater than or equal to 2.
+ As suggested by the title of this lemma, ALTS(bs1, rs1) is a result of simplification, which means that rs1 must be composed of at least 2 distinct regular terms. This alone says that $bs1>>rs1 ++ (bs2>>rs2)--rs1$ is a list of length greater than or equal to 2, as the second operand of the concatenation operator $(bs2>>rs2) -- rs1$ can only contribute a non-negative value to the overall length of the list 
+ $bs1>>rs1 ++ (bs2>>rs2)--rs1$.
 \end{proof}
 
 
-
+\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS+ some deletion after derivatives)}\label{lma8}\\
+$d ALTS(bs, bs1>>rs1 ++ bs2>>rs2) \sim_{m\epsilon} d ALTS(bs, bs1>>rs1 ++ ((bs2>>rs2)--rs1)        ) $ 
+\end{lemma}
+\begin{proof}
+Let's call $bs1>>rs1$ $rs1'$ and  $bs2>>rs2$ $rs2'$. Then we need to prove $d ALTS(bs, rs1' ++rs2') \sim_{m\epsilon} d ALTS(bs, rs1'++(rs2' -- rs1') )   $.\\
+We might as well omit the prime in each rs for simplicty of notation and prove $d ALTS(bs, rs1++rs2) \sim_{m\epsilon} d ALTS(bs, rs1++(rs2 -- rs1) )   $.\\
+We know that the result of derivative is nullable, so there must exist an r in rs1++rs2 s.t. r is nullable.\\
+If $r \in rs1$, then equivalence holds. If $r \in rs2 \wedge  r \notin rs1$, equivalence holds as well. This completes the proof.
+\end{proof}
 
 
 
@@ -236,13 +262,19 @@
 \begin{itemize}
 \item
 
+
 When we have an annotated regular expression ar and a string s that matches ar, by the correctness of the algorithm ders, we have that ders(ar, s) is nullable, and that mkepsBC will extract the desired bits for decoding the correct value v for the matching, and v is a POSIX value. Now we prove that mkepsBC(ders\_simp(ar, s)) yields the same bitsequence.
 \\
 We first open up the ders\_simp function into nested alternating sequences of ders and simp.
 Assume that s = $c_1...c_n$($n \geq 1$ ) where each of the $c_i$ are characters. 
 Then $ders\_simp(ar, s)$ = $s(d_{c_n}(...s(d_{c_1}(r))...))$ = $sdsd......sdr$. If we can prove that
-$sdr \sim_{m\epsilon} dsr$ holds for any regular expression and any character, then we are done. This is because then we can push ders operation inside and move simp operation outside and have that $sdsd...sdr \sim_{m\epsilon} ssddsdsd...sdr \sim_{m\epsilon} ... \sim_{m\epsilon} s....sd....dr$ and using lemma1 we have that $s...sd....dr = sd...dr$. By lemma2, we have $RHS \sim_{m\epsilon} d...dr$.\\
-Now we proceed to prove that $sdr \sim_{m\epsilon} dsr$. This can be reduced to proving $dr \sim_{m\epsilon} dsr$ as we know that $dr \sim_{m\epsilon} sdr$ by lemma2.\\
+$sdr \sim_{m\epsilon} dsr$ holds for any regular expression and any character, then we are done. This is because then we can push ders operation inside and move simp operation outside and have that $sdsd...sdr \sim_{m\epsilon} ssddsdsd...sdr \sim_{m\epsilon} ... \sim_{m\epsilon} s....sd....dr$.
+ Using \autoref{lma1} we have that $s...sd....dr = sd...dr$. 
+By \autoref{lma2}, we have $RHS \sim_{m\epsilon} d...dr$.\\
+Notice that we don't actually need \autoref{lma1} here. That is because by \autoref{lma2}, we can have that $s...sd....dr \sim_{m\epsilon} sd...dr$. The equality above can be replaced by mkepsBC
+ equivalence without affecting the validity of the whole proof since all we want is mkepsBC equivalence, not equality.
+
+Now we proceed to prove that $sdr \sim_{m\epsilon} dsr$. This can be reduced to proving $dr \sim_{m\epsilon} dsr$ as we know that $dr \sim_{m\epsilon} sdr$ by \autoref{lma2}.\\
 
 we use an induction proof. Base cases are omitted. Here are the 3 inductive cases.
 \begin{itemize}
@@ -250,11 +282,13 @@
 \item{$r_1+r_2$}
 $r_1+r_2$\\
 The most difficult case is when $sr1$ and $sr2$ are both ALTS, so that they will be opened up in the flats function and some terms in sr2 might be deleted. Or otherwise we can use the argument that $d(r_1+r_2) = dr_1 + dr_2  \sim_{m\epsilon} dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$,
- the last equivalence being established by lemma3.
+ the last equivalence being established by \autoref{lma3}.
 When $s(r_1), s(r_2)$ are both ALTS, we have to be more careful for the last equivalence step, namelly, $dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$. \\
-We have that $LHS = dsr_1 + dsr_2 = d(sr_1 +sr_2)$. Since $sr_1 = ALTS(bs1, rs1)$ and $sr_2 = ALTS(bs2, rs2)$ we have  $  d(sr_1 +sr_2 ) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$ by lemma4.
-On the other hand, $RHS = ds(ALTS(bs, r1, r2)) \sim_{m\epsilon}d Co(ALTS(bs, dB(flats(s(r1), s(r2))))) == d Co(ALTS(bs, dB(bs1>>rs1 ++ bs2>>rs2)))$ by definition of bsimp and flats.\\  $d Co(ALTS(bs, dB(bs1>>rs1 ++ bs2>>rs2))) \sim_{m\epsilon} d Co(ALTS(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1)        ))) $ by lemma5.\\ $d Co(ALTS(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1)        ))) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1))$ by lemma6. \\
- Using lemma5 again, we have  $d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1)) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$.\\
+We have that $LHS = dsr_1 + dsr_2 = d(sr_1 +sr_2)$. Since $sr_1 = ALTS(bs1, rs1)$ and $sr_2 = ALTS(bs2, rs2)$ we have  $  d(sr_1 +sr_2 ) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$ by \autoref{lma4}.
+On the other hand, $RHS = ds(ALTS(bs, r1, r2)) = d Co(bs, dB(flats(s(r1), s(r2)) ) ) = d Co(bs, dB(bs1>>rs1 ++ bs2>>rs2))$ by definition of bsimp and flats.\\  
+$d Co(bs, dB(bs1>>rs1 ++ bs2>>rs2)) = d Co(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1))) $ by \autoref{lma5}.\\ $d Co(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1)        )) = d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1))$ by \autoref{lma6}. \\
+ Using \autoref{lma8}, we have  $d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1)) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2)) \sim_{m\epsilon}  RHS$.\\
+ %only the last part we don't have an equality here. We use mkeps equivalence instead because these two regexes are indeed different syntactically. However, they have even more in common than just mkeps equal. We might later augment this proof so that this equivalence relation is changed to something like regular expression equivalence.
  This completes the proof.
 \item{$r*$}\\
 s(r*) = s(r).