--- a/Spiral.scala Fri Mar 22 12:53:56 2019 +0000
+++ b/Spiral.scala Sat Mar 23 11:53:09 2019 +0000
@@ -433,25 +433,28 @@
}
//simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set)
-
+ def pushbits(r: ARexp): ARexp = r match {
+ case AALTS(bs, rs) => AALTS(Nil, rs.map(fuse(bs, _)))
+ case r => r
+ }
def correctness_proof_convenient_path(){
for(i <- 1 to 1)
{
- val s = "abaa"//rd_string_gen(alphabet_size, 3)
- val r = ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
+ val s = "baa"//rd_string_gen(alphabet_size, 3)//"abaa"//rd_string_gen(alphabet_size, 3)
+ val r = ASTAR(List(),ASEQ(List(),AALTS(List(),List(ACHAR(List(Z),'c'), ACHAR(List(S),'b'))),ASEQ(List(),ASTAR(List(),ACHAR(List(),'a')),AALTS(List(),List(ACHAR(List(Z),'a'), ACHAR(List(S),'a'))))))//internalise(random_struct_gen(4))//ASTAR(List(),AALTS(List(),List(ASTAR(List(Z),ACHAR(List(),'a')), ASEQ(List(S),ACHAR(List(),'a'),ACHAR(List(),'b')))))//internalise(balanced_struct_gen(3))//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
for(j <- 0 to s.length - 1){
val ss = s.slice(0, j+ 1)
val nangao = ders_simp(r, ss.toList)
val easy = bsimp(bders(ss.toList, r))
- if(true){
+ if(!(nangao == easy || pushbits(nangao) == (easy))){
println(j)
- if(j == 3) println("not equal")
+ println("not equal")
println("string")
println(ss)
println("original regex")
- println(r)
+ println(annotated_tree(r))
println("regex after ders simp")
- println(nangao)
+ println(annotated_tree(nangao))
println("regex after ders")
println(annotated_tree(bders(ss.toList, r)))//flats' fuse when opening up AALTS causes the difference
println("regex after ders and then a single simp")
Binary file corr_pr_sketch.pdf has changed
--- a/corr_pr_sketch.tex Fri Mar 22 12:53:56 2019 +0000
+++ b/corr_pr_sketch.tex Sat Mar 23 11:53:09 2019 +0000
@@ -5,7 +5,8 @@
\usepackage{amsthm}
\usepackage{hyperref}
\usepackage[margin=0.5in]{geometry}
-
+\usepackage{pmboxdraw}
+
\theoremstyle{theorem}
\newtheorem{theorem}{Theorem}
@@ -243,6 +244,13 @@
If $r \in rs1$, then equivalence holds. If $r \in rs2 \wedge r \notin rs1$, equivalence holds as well. This completes the proof.
\end{proof}
+\begin{lemma}{nullability relation between a regex and its simplified version}\label{lma9}\\
+$r\ nullable \iff sr\ nullable $
+\end{lemma}
+
+\begin{lemma}{concatenation + simp invariance of mkepsBC}\label{lma10}\\
+$mkepsBC r1 \cdot sr2 = mkepsBC r1 \cdot r2$ if both r1 and r2 are nullable.
+\end{lemma}
\begin{theorem}{Correctness Result}
@@ -279,8 +287,7 @@
we use an induction proof. Base cases are omitted. Here are the 3 inductive cases.
\begin{itemize}
-\item{$r_1+r_2$}
-$r_1+r_2$\\
+\item{$r_1+r_2$}\\
The most difficult case is when $sr1$ and $sr2$ are both ALTS, so that they will be opened up in the flats function and some terms in sr2 might be deleted. Or otherwise we can use the argument that $d(r_1+r_2) = dr_1 + dr_2 \sim_{m\epsilon} dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$,
the last equivalence being established by \autoref{lma3}.
When $s(r_1), s(r_2)$ are both ALTS, we have to be more careful for the last equivalence step, namelly, $dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$. \\
@@ -291,16 +298,70 @@
%only the last part we don't have an equality here. We use mkeps equivalence instead because these two regexes are indeed different syntactically. However, they have even more in common than just mkeps equal. We might later augment this proof so that this equivalence relation is changed to something like regular expression equivalence.
This completes the proof.
\item{$r*$}\\
-s(r*) = s(r).
-\item{$r1.r2$}\\
-using previous.
+$s(r*) = r*$. Our goal is trivially achieved.
+\item{$r1 \cdot r2$}\\
+When r1 is nullable, $ds r1r2 = dsr1 \cdot sr2 + dsr2 \sim_{m\epsilon} dr1 \cdot sr2 + dr2 = dr1 \cdot r2 + dr2 $. The last step uses \autoref{lma10}.
+When r1 is not nullable, $ds r1r2 = dsr1 \cdot sr2 \sim_{m\epsilon} dr1 \cdot sr2 \sim_{m\epsilon} dr1 \cdot r2 $
\end{itemize}
\item
Proof of second part of the theorem: use a similar structure of argument as in the first part.
+
+\item
+This proof has a major flaw: it assumes all dr is nullable along the path of deriving r by s. But it could be the case that $s\in L(r)$ but $ \exists s' \in Pref(s) \ s.t.\ ders(s', r)$ is not nullable (or equivalently, $s'\notin L(r)$). One remedy for this is to replace the mkepsBC equivalence relation into some other transitive relation that entails mkepsBC equivalence.
\end{itemize}
\end{proof}
+\begin{theorem}{
+This is a very strong claim that has yet to be more carefully examined and proved. However, experiments suggest a very good hope for this.}\\
+Define pushbits as the following:\\
+$pushbits(r) = if( r == ALTS(bs, rs) )\ then\ ALTS(Nil, rs.map(fuse(bs,\_))) \ else\ r$.\\
+Then we have \mbox{\boldmath$pushbits(ders\_simp(ar, s) ) == simp(ders(ar,s)) \ or\ ders\_simp(ar, s) == simp(ders(ar, s))$}.\\
+Unfortunately this does not hold. A counterexample is\\
+\begin{verbatim}
+r = ASTAR(List(),ASEQ(List(),AALTS(List(),List(ACHAR(List(Z),c), ACHAR(List(S),b))),ASEQ(List(),ASTAR(List(),ACHAR(List(),a)),AALTS(List(),List(ACHAR(List(Z),a), ACHAR(List(S),a))))))
+regex after ders simp
+
+SEQ
+ └-ALT List(S, S, C(b))
+ | └-SEQ
+ | | └-STA List(S, C(a), S, C(a))
+ | | | └-a
+ | | └-a List(Z)
+ | └-ONE List(S, C(a), Z, Z, C(a))
+ └-STA
+ └-SEQ
+ └-ALT
+ | └-c List(Z)
+ | └-b List(S)
+ └-SEQ
+ └-STA
+ | └-a
+ └-ALT
+ └-a List(Z)
+ └-a List(S)
+regex after ders and then a single simp
+SEQ
+ └-ALT List(S)
+ | └-SEQ List(S, C(b))
+ | | └-STA List(S, C(a), S, C(a))
+ | | | └-a
+ | | └-a List(Z)
+ | └-ONE List(S, C(b), S, C(a), Z, Z, C(a))
+ └-STA
+ └-SEQ
+ └-ALT
+ | └-c List(Z)
+ | └-b List(S)
+ └-SEQ
+ └-STA
+ | └-a
+ └-ALT
+ └-a List(Z)
+ └-a List(S)
+\end{verbatim}
+\end{theorem}
+
\end{document}
%The second part might still need some more development.