corr_pr_sketch.tex
author Chengsong
Sat, 23 Mar 2019 11:53:09 +0000
changeset 10 2b95bcd2ac73
parent 9 2c02d27ec0a3
child 11 9c1ca6d6e190
permissions -rw-r--r--
exp and proof
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8
Chengsong
parents:
diff changeset
     1
\documentclass{article}
Chengsong
parents:
diff changeset
     2
\usepackage[utf8]{inputenc}
Chengsong
parents:
diff changeset
     3
\usepackage[english]{babel}
Chengsong
parents:
diff changeset
     4
\usepackage{listings}
Chengsong
parents:
diff changeset
     5
 \usepackage{amsthm}
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
     6
 \usepackage{hyperref}
8
Chengsong
parents:
diff changeset
     7
 \usepackage[margin=0.5in]{geometry}
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
     8
\usepackage{pmboxdraw}
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
     9
8
Chengsong
parents:
diff changeset
    10
\theoremstyle{theorem}
Chengsong
parents:
diff changeset
    11
\newtheorem{theorem}{Theorem}
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
\theoremstyle{lemma}
Chengsong
parents:
diff changeset
    14
\newtheorem{lemma}{Lemma}
Chengsong
parents:
diff changeset
    15
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
    16
\newcommand{\lemmaautorefname}{Lemma}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
    17
8
Chengsong
parents:
diff changeset
    18
\theoremstyle{definition}
Chengsong
parents:
diff changeset
    19
 \newtheorem{definition}{Definition}
Chengsong
parents:
diff changeset
    20
\begin{document}
Chengsong
parents:
diff changeset
    21
This is a sketch proof for the correctness of the algorithm ders\_simp.
Chengsong
parents:
diff changeset
    22
\section{Function Definitions}
Chengsong
parents:
diff changeset
    23
Chengsong
parents:
diff changeset
    24
\begin{definition}{Bits}
Chengsong
parents:
diff changeset
    25
\begin{verbatim}
Chengsong
parents:
diff changeset
    26
abstract class Bit
Chengsong
parents:
diff changeset
    27
case object Z extends Bit
Chengsong
parents:
diff changeset
    28
case object S extends Bit
Chengsong
parents:
diff changeset
    29
case class C(c: Char) extends Bit
Chengsong
parents:
diff changeset
    30
Chengsong
parents:
diff changeset
    31
type Bits = List[Bit]
Chengsong
parents:
diff changeset
    32
\end{verbatim}
Chengsong
parents:
diff changeset
    33
\end{definition}
Chengsong
parents:
diff changeset
    34
Chengsong
parents:
diff changeset
    35
\begin{definition}{Annotated Regular Expressions}
Chengsong
parents:
diff changeset
    36
\begin{verbatim}
Chengsong
parents:
diff changeset
    37
abstract class ARexp 
Chengsong
parents:
diff changeset
    38
case object AZERO extends ARexp
Chengsong
parents:
diff changeset
    39
case class AONE(bs: Bits) extends ARexp
Chengsong
parents:
diff changeset
    40
case class ACHAR(bs: Bits, f: Char) extends ARexp
Chengsong
parents:
diff changeset
    41
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
Chengsong
parents:
diff changeset
    42
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
    43
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
    44
\end{verbatim}
Chengsong
parents:
diff changeset
    45
\end{definition}
Chengsong
parents:
diff changeset
    46
Chengsong
parents:
diff changeset
    47
\begin{definition}{bnullable}
Chengsong
parents:
diff changeset
    48
\begin{verbatim}
Chengsong
parents:
diff changeset
    49
  def bnullable (r: ARexp) : Boolean = r match {
Chengsong
parents:
diff changeset
    50
    case AZERO => false
Chengsong
parents:
diff changeset
    51
    case AONE(_) => true
Chengsong
parents:
diff changeset
    52
    case ACHAR(_,_) => false
Chengsong
parents:
diff changeset
    53
    case AALTS(_, rs) => rs.exists(bnullable)
Chengsong
parents:
diff changeset
    54
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
Chengsong
parents:
diff changeset
    55
    case ASTAR(_, _) => true
Chengsong
parents:
diff changeset
    56
  }
Chengsong
parents:
diff changeset
    57
\end{verbatim}
Chengsong
parents:
diff changeset
    58
\end{definition}
Chengsong
parents:
diff changeset
    59
Chengsong
parents:
diff changeset
    60
\begin{definition}{ders\_simp}
Chengsong
parents:
diff changeset
    61
\begin{verbatim}
Chengsong
parents:
diff changeset
    62
def ders_simp(r: ARexp, s: List[Char]): ARexp = {
Chengsong
parents:
diff changeset
    63
 s match {
Chengsong
parents:
diff changeset
    64
   case Nil => r 
Chengsong
parents:
diff changeset
    65
   case c::cs => ders_simp(bsimp(bder(c, r)), cs)
Chengsong
parents:
diff changeset
    66
 }
Chengsong
parents:
diff changeset
    67
}\end{verbatim}
Chengsong
parents:
diff changeset
    68
\end{definition}
Chengsong
parents:
diff changeset
    69
Chengsong
parents:
diff changeset
    70
\begin{definition}{bder}
Chengsong
parents:
diff changeset
    71
\begin{verbatim}
Chengsong
parents:
diff changeset
    72
def bder(c: Char, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    73
 case AZERO => AZERO
Chengsong
parents:
diff changeset
    74
 case AONE(_) => AZERO
Chengsong
parents:
diff changeset
    75
 case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
Chengsong
parents:
diff changeset
    76
 case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
Chengsong
parents:
diff changeset
    77
 case ASEQ(bs, r1, r2) => {
Chengsong
parents:
diff changeset
    78
  if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
Chengsong
parents:
diff changeset
    79
  else ASEQ(bs, bder(c, r1), r2)
Chengsong
parents:
diff changeset
    80
  }
Chengsong
parents:
diff changeset
    81
 case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
Chengsong
parents:
diff changeset
    82
}
Chengsong
parents:
diff changeset
    83
\end{verbatim}
Chengsong
parents:
diff changeset
    84
\end{definition}
Chengsong
parents:
diff changeset
    85
Chengsong
parents:
diff changeset
    86
\begin{definition}{bsimp}
Chengsong
parents:
diff changeset
    87
\begin{verbatim}
Chengsong
parents:
diff changeset
    88
  def bsimp(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
    89
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
Chengsong
parents:
diff changeset
    90
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
    91
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
    92
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
    93
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
    94
    }
Chengsong
parents:
diff changeset
    95
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
    96
      val rs_simp = rs.map(bsimp)
Chengsong
parents:
diff changeset
    97
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
    98
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
    99
      dist_res match {
Chengsong
parents:
diff changeset
   100
        case Nil => AZERO
Chengsong
parents:
diff changeset
   101
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   102
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   103
      }
Chengsong
parents:
diff changeset
   104
    }
Chengsong
parents:
diff changeset
   105
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
Chengsong
parents:
diff changeset
   106
    case r => r
Chengsong
parents:
diff changeset
   107
  }
Chengsong
parents:
diff changeset
   108
\end{verbatim}
Chengsong
parents:
diff changeset
   109
\end{definition}
Chengsong
parents:
diff changeset
   110
Chengsong
parents:
diff changeset
   111
\begin{definition}{sub-parts of bsimp}
Chengsong
parents:
diff changeset
   112
\begin{itemize}
Chengsong
parents:
diff changeset
   113
\item{flats}\\
Chengsong
parents:
diff changeset
   114
flattens the list.
Chengsong
parents:
diff changeset
   115
\item{dB}\\
Chengsong
parents:
diff changeset
   116
means distinctBy
Chengsong
parents:
diff changeset
   117
\item{Co}\\
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   118
The last matching clause of the function bsimp, with a slight modification to suit later reasoning.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   119
\begin{verbatim}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   120
def Co(bs1, rs): ARexp = {
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   121
      rs match {
8
Chengsong
parents:
diff changeset
   122
        case Nil => AZERO
Chengsong
parents:
diff changeset
   123
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   124
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   125
      }
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   126
\end{verbatim}
8
Chengsong
parents:
diff changeset
   127
\end{itemize}
Chengsong
parents:
diff changeset
   128
\end{definition}
Chengsong
parents:
diff changeset
   129
Chengsong
parents:
diff changeset
   130
\begin{definition}{fuse}
Chengsong
parents:
diff changeset
   131
\begin{verbatim}
Chengsong
parents:
diff changeset
   132
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
   133
    case AZERO => AZERO
Chengsong
parents:
diff changeset
   134
    case AONE(cs) => AONE(bs ++ cs)
Chengsong
parents:
diff changeset
   135
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
Chengsong
parents:
diff changeset
   136
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
Chengsong
parents:
diff changeset
   137
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
Chengsong
parents:
diff changeset
   138
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
Chengsong
parents:
diff changeset
   139
  }
Chengsong
parents:
diff changeset
   140
\end{verbatim}
Chengsong
parents:
diff changeset
   141
\end{definition}
Chengsong
parents:
diff changeset
   142
Chengsong
parents:
diff changeset
   143
Chengsong
parents:
diff changeset
   144
\begin{definition}{mkepsBC}
Chengsong
parents:
diff changeset
   145
\begin{verbatim}
Chengsong
parents:
diff changeset
   146
  def mkepsBC(r: ARexp) : Bits = r match {
Chengsong
parents:
diff changeset
   147
    case AONE(bs) => bs
Chengsong
parents:
diff changeset
   148
    case AALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   149
      val n = rs.indexWhere(bnullable)
Chengsong
parents:
diff changeset
   150
      bs ++ mkepsBC(rs(n))
Chengsong
parents:
diff changeset
   151
    }
Chengsong
parents:
diff changeset
   152
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
Chengsong
parents:
diff changeset
   153
    case ASTAR(bs, r) => bs ++ List(Z)
Chengsong
parents:
diff changeset
   154
  }
Chengsong
parents:
diff changeset
   155
\end{verbatim}
Chengsong
parents:
diff changeset
   156
\end{definition}
Chengsong
parents:
diff changeset
   157
Chengsong
parents:
diff changeset
   158
\begin{definition}{mkepsBC equicalence}
Chengsong
parents:
diff changeset
   159
\\
Chengsong
parents:
diff changeset
   160
Given 2 nullable annotated regular expressions r1, r2, if mkepsBC(r1) == mkepsBC(r2)
Chengsong
parents:
diff changeset
   161
then r1 and r2 are mkepsBC equivalent, denoted as r1 $\sim_{m\epsilon}$ r2
Chengsong
parents:
diff changeset
   162
\end{definition}
Chengsong
parents:
diff changeset
   163
Chengsong
parents:
diff changeset
   164
\begin{definition}{shorthand notation for ders}
Chengsong
parents:
diff changeset
   165
\\For the sake of reducing verbosity, we sometimes use the shorthand notation
Chengsong
parents:
diff changeset
   166
$d_{c}(r)$ for the function application bder(c, r) and $s(r)$(s here stands for simplification) for the function application bsimp(r) .
Chengsong
parents:
diff changeset
   167
\\We omit the subscript when it is clear from context what that character is and write $d(r)$ instead of $d_{c}(r)$. 
Chengsong
parents:
diff changeset
   168
\\And we omit the parentheses when no confusion can be caused. For example ders\_simp(c, r) can be written as $s(d_{c}(r))$ or even $s d r$ as we know the derivative operation is w.r.t the character c. Here the s and d are more like operators that take an annotated regular expression as an input and return an annotated regular expression as an output
Chengsong
parents:
diff changeset
   169
Chengsong
parents:
diff changeset
   170
\end{definition}
Chengsong
parents:
diff changeset
   171
Chengsong
parents:
diff changeset
   172
Chengsong
parents:
diff changeset
   173
Chengsong
parents:
diff changeset
   174
\begin{definition}{distinctBy operation expressed in a different way--how it transforms the list}\\
Chengsong
parents:
diff changeset
   175
Given two lists rs1 and rs2, we define the operation $--$:\\
Chengsong
parents:
diff changeset
   176
$rs1 -- rs2 := [r \in rs1 |  r \notin rs2]$
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   177
Note that the order each term appears in $rs_1 -- rs_2$ is preserved as in the original list.
8
Chengsong
parents:
diff changeset
   178
\end{definition}
Chengsong
parents:
diff changeset
   179
Chengsong
parents:
diff changeset
   180
Chengsong
parents:
diff changeset
   181
\section{Main Result}
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   182
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   183
\begin{lemma}{simplification function does not simplify an already simplified regex}\label{lma1}\\
8
Chengsong
parents:
diff changeset
   184
bsimp(r) == bsimp(bsimp(r)) holds for any annotated regular expression r.
Chengsong
parents:
diff changeset
   185
\end{lemma}
Chengsong
parents:
diff changeset
   186
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   187
\begin{lemma}{simp and mkeps}\label{lma2}\\
8
Chengsong
parents:
diff changeset
   188
When r is nullable, we have that
Chengsong
parents:
diff changeset
   189
mkeps(bsimp(r)) == mkeps(r)
Chengsong
parents:
diff changeset
   190
\end{lemma}
Chengsong
parents:
diff changeset
   191
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   192
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   193
%\begin{theorem}See~\cref{lma1}.\end{theorem}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   194
%\begin{lemma}\label{lma1}\lipsum[2]\end{lemma}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   195
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   196
\begin{lemma}{mkeps equivalence  w.r.t some syntactically different regular expressions(1 ALTS)}\label{lma3}\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   197
When one of the 2 regular expressions $s(r_1)$ and $s(r_2)$ is of the form ALTS(bs1, rs1), we have that $ds(ALTS(bs, r1, r2)) \sim_{m\epsilon} d(ALTS(bs, sr_1, sr_2))$
8
Chengsong
parents:
diff changeset
   198
\end{lemma}
Chengsong
parents:
diff changeset
   199
\begin{proof}
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   200
By opening up one of the alts and show no additional changes are made.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   201
Details: $ds(ALTS(bs, r1, r2)) = d Co( bs, dB(flats(sr1, sr2)) )$
8
Chengsong
parents:
diff changeset
   202
\end{proof}
Chengsong
parents:
diff changeset
   203
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   204
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   205
\begin{lemma}{mkepsBC invariant manipulation of bits and notation}\label{lma7}\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   206
ALTS(bs, ALTS(bs1, rs1), ALTS(bs2, rs2)) $\sim_{m\epsilon}$ ALTS(bs, rs1.map(fuse(bs1, \_)) ++ rs2.map(fuse(bs2, \_)) ). \\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   207
We also use $bs2>>rs2 $ as a shorthand notation for rs2.map(fuse(bs2,\_)).
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   208
\end{lemma}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   209
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   210
\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS)}\label{lma4}\\
8
Chengsong
parents:
diff changeset
   211
$sr_1 = ALTS(bs1, rs1)$ and $sr_2 = ALTS(bs2, rs2)$ we have  $  d(sr_1 +sr_2 ) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$ 
Chengsong
parents:
diff changeset
   212
\end{lemma}
Chengsong
parents:
diff changeset
   213
\begin{proof}
Chengsong
parents:
diff changeset
   214
We are just fusing bits inside here, there is no other structural change.
Chengsong
parents:
diff changeset
   215
\end{proof}
Chengsong
parents:
diff changeset
   216
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   217
\begin{lemma}{What does dB do to two already simplified ALTS}\label{lma5}\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   218
$d Co(ALTS(bs, dB(bs1>>rs1 ++ bs2>>rs2))) = d Co(ALTS(bs, bs1>>rs1 ++ ((bs2>>rs2)--rs1)        )) $ 
8
Chengsong
parents:
diff changeset
   219
\end{lemma}
Chengsong
parents:
diff changeset
   220
\begin{proof}
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   221
We prove that $ dB(bs1>>rs1 ++ bs2>>rs2) = bs1>>rs1 ++ ((bs2>>rs2)--rs1)$.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   222
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   223
8
Chengsong
parents:
diff changeset
   224
\end{proof}
Chengsong
parents:
diff changeset
   225
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   226
\begin{lemma}{after opening two previously simplified alts up into terms, length must exceed 2}\label{lma6}\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   227
If sr1, sr2 are of the form ALTS(bs1, rs1), ALTS(bs2, rs2) respectively, then we have that 
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   228
$Co(bs, (bs1>>rs1) ++ (bs2>>rs2)--rs1) = ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1)$
8
Chengsong
parents:
diff changeset
   229
\end{lemma}
Chengsong
parents:
diff changeset
   230
\begin{proof}
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   231
 $Co(bs, rs) \sim_{m\epsilon} ALTS(bs, rs)$ if $rs$ is a list of length greater than or equal to 2.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   232
 As suggested by the title of this lemma, ALTS(bs1, rs1) is a result of simplification, which means that rs1 must be composed of at least 2 distinct regular terms. This alone says that $bs1>>rs1 ++ (bs2>>rs2)--rs1$ is a list of length greater than or equal to 2, as the second operand of the concatenation operator $(bs2>>rs2) -- rs1$ can only contribute a non-negative value to the overall length of the list 
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   233
 $bs1>>rs1 ++ (bs2>>rs2)--rs1$.
8
Chengsong
parents:
diff changeset
   234
\end{proof}
Chengsong
parents:
diff changeset
   235
Chengsong
parents:
diff changeset
   236
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   237
\begin{lemma}{mkepsBC equivalence w.r.t syntactically different regular expressions(2 ALTS+ some deletion after derivatives)}\label{lma8}\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   238
$d ALTS(bs, bs1>>rs1 ++ bs2>>rs2) \sim_{m\epsilon} d ALTS(bs, bs1>>rs1 ++ ((bs2>>rs2)--rs1)        ) $ 
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   239
\end{lemma}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   240
\begin{proof}
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   241
Let's call $bs1>>rs1$ $rs1'$ and  $bs2>>rs2$ $rs2'$. Then we need to prove $d ALTS(bs, rs1' ++rs2') \sim_{m\epsilon} d ALTS(bs, rs1'++(rs2' -- rs1') )   $.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   242
We might as well omit the prime in each rs for simplicty of notation and prove $d ALTS(bs, rs1++rs2) \sim_{m\epsilon} d ALTS(bs, rs1++(rs2 -- rs1) )   $.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   243
We know that the result of derivative is nullable, so there must exist an r in rs1++rs2 s.t. r is nullable.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   244
If $r \in rs1$, then equivalence holds. If $r \in rs2 \wedge  r \notin rs1$, equivalence holds as well. This completes the proof.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   245
\end{proof}
8
Chengsong
parents:
diff changeset
   246
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   247
\begin{lemma}{nullability relation between a regex and its simplified version}\label{lma9}\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   248
$r\ nullable \iff sr\ nullable $ 
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   249
\end{lemma}
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   250
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   251
\begin{lemma}{concatenation + simp invariance of mkepsBC}\label{lma10}\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   252
$mkepsBC r1 \cdot sr2 = mkepsBC r1 \cdot r2$ if both r1 and r2 are nullable.
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   253
\end{lemma}
8
Chengsong
parents:
diff changeset
   254
Chengsong
parents:
diff changeset
   255
Chengsong
parents:
diff changeset
   256
\begin{theorem}{Correctness Result}
Chengsong
parents:
diff changeset
   257
Chengsong
parents:
diff changeset
   258
\begin{itemize}
Chengsong
parents:
diff changeset
   259
Chengsong
parents:
diff changeset
   260
\item{}
Chengsong
parents:
diff changeset
   261
When s is a string in the language L(ar), \\
Chengsong
parents:
diff changeset
   262
ders\_simp(ar, s)  $\sim_{m\epsilon}$ ders(ar, s), \\
Chengsong
parents:
diff changeset
   263
\item{}
Chengsong
parents:
diff changeset
   264
when s is not a string of the language L(ar)
Chengsong
parents:
diff changeset
   265
ders\_simp(ar, s) is not nullable
Chengsong
parents:
diff changeset
   266
\end{itemize}
Chengsong
parents:
diff changeset
   267
\end{theorem}
Chengsong
parents:
diff changeset
   268
Chengsong
parents:
diff changeset
   269
\begin{proof}{Split into 2 parts.}
Chengsong
parents:
diff changeset
   270
\begin{itemize}
Chengsong
parents:
diff changeset
   271
\item
Chengsong
parents:
diff changeset
   272
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   273
8
Chengsong
parents:
diff changeset
   274
When we have an annotated regular expression ar and a string s that matches ar, by the correctness of the algorithm ders, we have that ders(ar, s) is nullable, and that mkepsBC will extract the desired bits for decoding the correct value v for the matching, and v is a POSIX value. Now we prove that mkepsBC(ders\_simp(ar, s)) yields the same bitsequence.
Chengsong
parents:
diff changeset
   275
\\
Chengsong
parents:
diff changeset
   276
We first open up the ders\_simp function into nested alternating sequences of ders and simp.
Chengsong
parents:
diff changeset
   277
Assume that s = $c_1...c_n$($n \geq 1$ ) where each of the $c_i$ are characters. 
Chengsong
parents:
diff changeset
   278
Then $ders\_simp(ar, s)$ = $s(d_{c_n}(...s(d_{c_1}(r))...))$ = $sdsd......sdr$. If we can prove that
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   279
$sdr \sim_{m\epsilon} dsr$ holds for any regular expression and any character, then we are done. This is because then we can push ders operation inside and move simp operation outside and have that $sdsd...sdr \sim_{m\epsilon} ssddsdsd...sdr \sim_{m\epsilon} ... \sim_{m\epsilon} s....sd....dr$.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   280
 Using \autoref{lma1} we have that $s...sd....dr = sd...dr$. 
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   281
By \autoref{lma2}, we have $RHS \sim_{m\epsilon} d...dr$.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   282
Notice that we don't actually need \autoref{lma1} here. That is because by \autoref{lma2}, we can have that $s...sd....dr \sim_{m\epsilon} sd...dr$. The equality above can be replaced by mkepsBC
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   283
 equivalence without affecting the validity of the whole proof since all we want is mkepsBC equivalence, not equality.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   284
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   285
Now we proceed to prove that $sdr \sim_{m\epsilon} dsr$. This can be reduced to proving $dr \sim_{m\epsilon} dsr$ as we know that $dr \sim_{m\epsilon} sdr$ by \autoref{lma2}.\\
8
Chengsong
parents:
diff changeset
   286
Chengsong
parents:
diff changeset
   287
we use an induction proof. Base cases are omitted. Here are the 3 inductive cases.
Chengsong
parents:
diff changeset
   288
\begin{itemize}
Chengsong
parents:
diff changeset
   289
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   290
\item{$r_1+r_2$}\\
8
Chengsong
parents:
diff changeset
   291
The most difficult case is when $sr1$ and $sr2$ are both ALTS, so that they will be opened up in the flats function and some terms in sr2 might be deleted. Or otherwise we can use the argument that $d(r_1+r_2) = dr_1 + dr_2  \sim_{m\epsilon} dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$,
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   292
 the last equivalence being established by \autoref{lma3}.
8
Chengsong
parents:
diff changeset
   293
When $s(r_1), s(r_2)$ are both ALTS, we have to be more careful for the last equivalence step, namelly, $dsr_1 + dsr_2 \sim_{m\epsilon} ds(r_1+r_2)$. \\
9
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   294
We have that $LHS = dsr_1 + dsr_2 = d(sr_1 +sr_2)$. Since $sr_1 = ALTS(bs1, rs1)$ and $sr_2 = ALTS(bs2, rs2)$ we have  $  d(sr_1 +sr_2 ) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2))$ by \autoref{lma4}.
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   295
On the other hand, $RHS = ds(ALTS(bs, r1, r2)) = d Co(bs, dB(flats(s(r1), s(r2)) ) ) = d Co(bs, dB(bs1>>rs1 ++ bs2>>rs2))$ by definition of bsimp and flats.\\  
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   296
$d Co(bs, dB(bs1>>rs1 ++ bs2>>rs2)) = d Co(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1))) $ by \autoref{lma5}.\\ $d Co(bs, (bs1>>rs1 ++ ((bs2>>rs2)--rs1)        )) = d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1))$ by \autoref{lma6}. \\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   297
 Using \autoref{lma8}, we have  $d(ALTS(bs, bs1>>rs1 ++ (bs2>>rs2)--rs1)) \sim_{m\epsilon} d(ALTS(bs, bs1>>rs1 ++ bs2>>rs2)) \sim_{m\epsilon}  RHS$.\\
2c02d27ec0a3 augmented version of proof sketch
Chengsong
parents: 8
diff changeset
   298
 %only the last part we don't have an equality here. We use mkeps equivalence instead because these two regexes are indeed different syntactically. However, they have even more in common than just mkeps equal. We might later augment this proof so that this equivalence relation is changed to something like regular expression equivalence.
8
Chengsong
parents:
diff changeset
   299
 This completes the proof.
Chengsong
parents:
diff changeset
   300
\item{$r*$}\\
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   301
$s(r*) = r*$. Our goal is trivially achieved.
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   302
\item{$r1 \cdot r2$}\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   303
When r1 is nullable, $ds r1r2 = dsr1 \cdot sr2 + dsr2  \sim_{m\epsilon}  dr1 \cdot sr2 + dr2 = dr1 \cdot r2 + dr2 $. The last step uses \autoref{lma10}. 
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   304
When r1 is not nullable, $ds r1r2 = dsr1 \cdot sr2  \sim_{m\epsilon}  dr1 \cdot sr2  \sim_{m\epsilon}  dr1 \cdot r2 $
8
Chengsong
parents:
diff changeset
   305
Chengsong
parents:
diff changeset
   306
\end{itemize}
Chengsong
parents:
diff changeset
   307
\item
Chengsong
parents:
diff changeset
   308
Proof of second part of the theorem: use a similar structure of argument as in the first part.
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   309
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   310
\item
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   311
This proof has a major flaw: it assumes all dr is nullable along the path of deriving r by s. But it could be the case that $s\in L(r)$ but $ \exists s' \in Pref(s) \ s.t.\ ders(s', r)$ is not nullable (or equivalently, $s'\notin L(r)$). One remedy for this is to replace the mkepsBC equivalence relation into some other transitive relation that entails mkepsBC equivalence.
8
Chengsong
parents:
diff changeset
   312
\end{itemize}
Chengsong
parents:
diff changeset
   313
\end{proof}
Chengsong
parents:
diff changeset
   314
10
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   315
\begin{theorem}{
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   316
This is a very strong claim that has yet to be more carefully examined and proved. However, experiments suggest a very good hope for this.}\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   317
Define pushbits as the following:\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   318
$pushbits(r) = if( r == ALTS(bs, rs) )\ then\ ALTS(Nil, rs.map(fuse(bs,\_)))  \ else\ r$.\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   319
Then we have \mbox{\boldmath$pushbits(ders\_simp(ar, s) ) == simp(ders(ar,s)) \ or\ ders\_simp(ar, s) == simp(ders(ar, s))$}.\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   320
Unfortunately this does not hold. A counterexample is\\
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   321
\begin{verbatim}
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   322
r = ASTAR(List(),ASEQ(List(),AALTS(List(),List(ACHAR(List(Z),c), ACHAR(List(S),b))),ASEQ(List(),ASTAR(List(),ACHAR(List(),a)),AALTS(List(),List(ACHAR(List(Z),a), ACHAR(List(S),a))))))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   323
regex after ders simp
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   324
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   325
SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   326
 └-ALT List(S, S, C(b))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   327
 |  └-SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   328
 |  |  └-STA List(S, C(a), S, C(a))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   329
 |  |  |  └-a
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   330
 |  |  └-a List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   331
 |  └-ONE List(S, C(a), Z, Z, C(a))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   332
 └-STA
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   333
    └-SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   334
       └-ALT
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   335
       |  └-c List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   336
       |  └-b List(S)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   337
       └-SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   338
          └-STA
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   339
          |  └-a
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   340
          └-ALT
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   341
             └-a List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   342
             └-a List(S)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   343
regex after ders and then a single simp
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   344
SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   345
 └-ALT List(S)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   346
 |  └-SEQ List(S, C(b))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   347
 |  |  └-STA List(S, C(a), S, C(a))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   348
 |  |  |  └-a
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   349
 |  |  └-a List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   350
 |  └-ONE List(S, C(b), S, C(a), Z, Z, C(a))
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   351
 └-STA
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   352
    └-SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   353
       └-ALT
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   354
       |  └-c List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   355
       |  └-b List(S)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   356
       └-SEQ
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   357
          └-STA
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   358
          |  └-a
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   359
          └-ALT
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   360
             └-a List(Z)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   361
             └-a List(S)
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   362
\end{verbatim}
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   363
\end{theorem}
2b95bcd2ac73 exp and proof
Chengsong
parents: 9
diff changeset
   364
8
Chengsong
parents:
diff changeset
   365
\end{document}
Chengsong
parents:
diff changeset
   366
Chengsong
parents:
diff changeset
   367
%The second part might still need some more development.
Chengsong
parents:
diff changeset
   368
%When s is not in the set L(ar), we have that s = s1@s2 s.t. s1 $\in$ L(ar), and any longer string that is a prefix of s does not belong to L(ar).\\
Chengsong
parents:
diff changeset
   369
%By first part of proof, we have ders(ar, s1) $\sim_{m\epsilon}$ ders\_simp(ar, s1)
Chengsong
parents:
diff changeset
   370
%.....somehow show that by correctness, der(c, ders\_simp(ar, s1)) must be not nullable. But this will need that L(ders(ar, s1)) == L(ders\_simp(ar, s1)). By part 1 of proof we only have that for any string s1c s.t. s1c $\in$ L(ar) (which is equivalent to s $\in$ L(ders(ar, s1))), s is also in L(ders\_simp(ar, s1)). That is an inclusion, not an equality. c not in L(ders(ar, s1)) does not imply c not in L(ders\_simp(ar, s1))
Chengsong
parents:
diff changeset
   371
%So this path stuck here.