corr_pr_sketch.tex
changeset 17 3241b1e71633
parent 11 9c1ca6d6e190
child 73 569280c1f56c
--- a/corr_pr_sketch.tex	Wed May 08 22:09:59 2019 +0100
+++ b/corr_pr_sketch.tex	Tue Jun 25 18:56:52 2019 +0100
@@ -18,71 +18,102 @@
 \theoremstyle{definition}
  \newtheorem{definition}{Definition}
 \begin{document}
-This is a sketch proof for the correctness of the algorithm ders\_simp.
-\section{Function Definitions}
+This is an outline of the structure of the paper with a little bit of flesh in it.
+\section{The Flow of thought process}
 
-\begin{definition}{Bits}
-\begin{verbatim}
-abstract class Bit
-case object Z extends Bit
-case object S extends Bit
-case class C(c: Char) extends Bit
 
-type Bits = List[Bit]
-\end{verbatim}
-\end{definition}
-
-\begin{definition}{Annotated Regular Expressions}
+\begin{definition}{Regular Expressions and values}
 \begin{verbatim}
-abstract class ARexp 
-case object AZERO extends ARexp
-case class AONE(bs: Bits) extends ARexp
-case class ACHAR(bs: Bits, f: Char) extends ARexp
-case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
-case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
-case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
+TODO
 \end{verbatim}
 \end{definition}
 
-\begin{definition}{bnullable}
+Value is.a parse tree for the regular expression matching the string.
+
+
+
+\begin{definition}{nullable}
 \begin{verbatim}
-  def bnullable (r: ARexp) : Boolean = r match {
-    case AZERO => false
-    case AONE(_) => true
-    case ACHAR(_,_) => false
-    case AALTS(_, rs) => rs.exists(bnullable)
-    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
-    case ASTAR(_, _) => true
-  }
+TODO
+\end{verbatim}
+\end{definition}
+The idea behind nullable: whether it contains the empty string.
+
+\begin{definition}{derivatives}
+TODO
+
+\end{definition}
+
+This definition can be used for matching algorithm.
+
+\begin{definition}{matcher}
+TODO
+\end{definition}
+
+\begin{definition}{POSIX values}
+TODO
+\end{definition}
+
+\begin{definition}{POSIX lexer algorithm}
+TODO
+\end{definition}
+
+\begin{definition}{POSIX lexer algorithm with simplification}
+TODO
+\end{definition}
+This simplification algorithm is rather complicated as it entangles derivative, simplification and value reconstruction. We need to split the regular expression structure of the information for lexing so that simplifcation only changes the regex but does not destroy the information for reconstructing the resulting value.\\
+
+Introduce regex with auxiliary information:
+
+\begin{definition}{annotated regular expression}
+TODO
+\end{definition}
+
+\begin{definition}{encoding}
+TODO
+\end{definition}
+Encoding translates values into bit codes with some information loss.
+
+\begin{definition}{decoding}
+TODO
+\end{definition}
+Decoding translates bitcodes back into values with the help of regex to recover the structure.\\
+
+During different phases of lexing, we sometimes already know what the value would look like if we match the branch of regex with the string(e.g. a STAR with 1 more iteration, a left/right value), so we can partially encode the value at diffferent phases of the algorithm for later decoding.
+\\
+Examples of such partial encoding:
+
+\begin{definition}{internalise}
+TODO
+\end{definition}
+When doing internalise on ALT:\\
+Whichever branch is chosen, we know exactly the shape of the value, and therefore can get the bit code for such a value: Left corresponds to Z and Right to S
+
+\begin{definition}{bmkeps}
+TODO
+\end{definition}
+bmkeps on the STAR case:\\
+We know there could be no more iteration for a star if we want to get a POSIX value for an empty string, so the value must be Stars [], corresponding to an S in the bit code.
+
+\begin{definition}{bder}
+TODO
+\end{definition}
+SEQ case with the first regex nullable:\\
+bmkeps will extract the value for how a1 mathces the empty string and encode it into a bit sequence.
+
+\begin{definition}{blexer}
+\begin{verbatim}
+TODO
 \end{verbatim}
 \end{definition}
 
-\begin{definition}{ders\_simp}
-\begin{verbatim}
-def ders_simp(r: ARexp, s: List[Char]): ARexp = {
- s match {
-   case Nil => r 
-   case c::cs => ders_simp(bsimp(bder(c, r)), cs)
- }
-}\end{verbatim}
-\end{definition}
+adding simplifcation.\\
+
+size of simplified regex: smaller than Antimirov's pder.\\
 
-\begin{definition}{bder}
-\begin{verbatim}
-def bder(c: Char, r: ARexp) : ARexp = r match {
- case AZERO => AZERO
- case AONE(_) => AZERO
- case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
- case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
- case ASEQ(bs, r1, r2) => {
-  if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
-  else ASEQ(bs, bder(c, r1), r2)
-  }
- case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
-}
-\end{verbatim}
-\end{definition}
+
 
+The rest of the document is the residual from a previous doc and may be deleted.\\
 \begin{definition}{bsimp}
 \begin{verbatim}
   def bsimp(r: ARexp): ARexp = r match {