corr_pr_sketch.tex
changeset 17 3241b1e71633
parent 11 9c1ca6d6e190
child 73 569280c1f56c
equal deleted inserted replaced
16:c51178fa85fe 17:3241b1e71633
    16 \newcommand{\lemmaautorefname}{Lemma}
    16 \newcommand{\lemmaautorefname}{Lemma}
    17 
    17 
    18 \theoremstyle{definition}
    18 \theoremstyle{definition}
    19  \newtheorem{definition}{Definition}
    19  \newtheorem{definition}{Definition}
    20 \begin{document}
    20 \begin{document}
    21 This is a sketch proof for the correctness of the algorithm ders\_simp.
    21 This is an outline of the structure of the paper with a little bit of flesh in it.
    22 \section{Function Definitions}
    22 \section{The Flow of thought process}
    23 
    23 
    24 \begin{definition}{Bits}
    24 
    25 \begin{verbatim}
    25 \begin{definition}{Regular Expressions and values}
    26 abstract class Bit
    26 \begin{verbatim}
    27 case object Z extends Bit
    27 TODO
    28 case object S extends Bit
    28 \end{verbatim}
    29 case class C(c: Char) extends Bit
    29 \end{definition}
    30 
    30 
    31 type Bits = List[Bit]
    31 Value is.a parse tree for the regular expression matching the string.
    32 \end{verbatim}
    32 
    33 \end{definition}
    33 
    34 
    34 
    35 \begin{definition}{Annotated Regular Expressions}
    35 \begin{definition}{nullable}
    36 \begin{verbatim}
    36 \begin{verbatim}
    37 abstract class ARexp 
    37 TODO
    38 case object AZERO extends ARexp
    38 \end{verbatim}
    39 case class AONE(bs: Bits) extends ARexp
    39 \end{definition}
    40 case class ACHAR(bs: Bits, f: Char) extends ARexp
    40 The idea behind nullable: whether it contains the empty string.
    41 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    41 
    42 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    42 \begin{definition}{derivatives}
    43 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    43 TODO
    44 \end{verbatim}
    44 
    45 \end{definition}
    45 \end{definition}
    46 
    46 
    47 \begin{definition}{bnullable}
    47 This definition can be used for matching algorithm.
    48 \begin{verbatim}
    48 
    49   def bnullable (r: ARexp) : Boolean = r match {
    49 \begin{definition}{matcher}
    50     case AZERO => false
    50 TODO
    51     case AONE(_) => true
    51 \end{definition}
    52     case ACHAR(_,_) => false
    52 
    53     case AALTS(_, rs) => rs.exists(bnullable)
    53 \begin{definition}{POSIX values}
    54     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
    54 TODO
    55     case ASTAR(_, _) => true
    55 \end{definition}
    56   }
    56 
    57 \end{verbatim}
    57 \begin{definition}{POSIX lexer algorithm}
    58 \end{definition}
    58 TODO
    59 
    59 \end{definition}
    60 \begin{definition}{ders\_simp}
    60 
    61 \begin{verbatim}
    61 \begin{definition}{POSIX lexer algorithm with simplification}
    62 def ders_simp(r: ARexp, s: List[Char]): ARexp = {
    62 TODO
    63  s match {
    63 \end{definition}
    64    case Nil => r 
    64 This simplification algorithm is rather complicated as it entangles derivative, simplification and value reconstruction. We need to split the regular expression structure of the information for lexing so that simplifcation only changes the regex but does not destroy the information for reconstructing the resulting value.\\
    65    case c::cs => ders_simp(bsimp(bder(c, r)), cs)
    65 
    66  }
    66 Introduce regex with auxiliary information:
    67 }\end{verbatim}
    67 
    68 \end{definition}
    68 \begin{definition}{annotated regular expression}
       
    69 TODO
       
    70 \end{definition}
       
    71 
       
    72 \begin{definition}{encoding}
       
    73 TODO
       
    74 \end{definition}
       
    75 Encoding translates values into bit codes with some information loss.
       
    76 
       
    77 \begin{definition}{decoding}
       
    78 TODO
       
    79 \end{definition}
       
    80 Decoding translates bitcodes back into values with the help of regex to recover the structure.\\
       
    81 
       
    82 During different phases of lexing, we sometimes already know what the value would look like if we match the branch of regex with the string(e.g. a STAR with 1 more iteration, a left/right value), so we can partially encode the value at diffferent phases of the algorithm for later decoding.
       
    83 \\
       
    84 Examples of such partial encoding:
       
    85 
       
    86 \begin{definition}{internalise}
       
    87 TODO
       
    88 \end{definition}
       
    89 When doing internalise on ALT:\\
       
    90 Whichever branch is chosen, we know exactly the shape of the value, and therefore can get the bit code for such a value: Left corresponds to Z and Right to S
       
    91 
       
    92 \begin{definition}{bmkeps}
       
    93 TODO
       
    94 \end{definition}
       
    95 bmkeps on the STAR case:\\
       
    96 We know there could be no more iteration for a star if we want to get a POSIX value for an empty string, so the value must be Stars [], corresponding to an S in the bit code.
    69 
    97 
    70 \begin{definition}{bder}
    98 \begin{definition}{bder}
    71 \begin{verbatim}
    99 TODO
    72 def bder(c: Char, r: ARexp) : ARexp = r match {
   100 \end{definition}
    73  case AZERO => AZERO
   101 SEQ case with the first regex nullable:\\
    74  case AONE(_) => AZERO
   102 bmkeps will extract the value for how a1 mathces the empty string and encode it into a bit sequence.
    75  case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
   103 
    76  case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
   104 \begin{definition}{blexer}
    77  case ASEQ(bs, r1, r2) => {
   105 \begin{verbatim}
    78   if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
   106 TODO
    79   else ASEQ(bs, bder(c, r1), r2)
   107 \end{verbatim}
    80   }
   108 \end{definition}
    81  case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
   109 
    82 }
   110 adding simplifcation.\\
    83 \end{verbatim}
   111 
    84 \end{definition}
   112 size of simplified regex: smaller than Antimirov's pder.\\
    85 
   113 
       
   114 
       
   115 
       
   116 The rest of the document is the residual from a previous doc and may be deleted.\\
    86 \begin{definition}{bsimp}
   117 \begin{definition}{bsimp}
    87 \begin{verbatim}
   118 \begin{verbatim}
    88   def bsimp(r: ARexp): ARexp = r match {
   119   def bsimp(r: ARexp): ARexp = r match {
    89     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
   120     case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
    90         case (AZERO, _) => AZERO
   121         case (AZERO, _) => AZERO