progs/scala/re-bit.scala
changeset 286 804fbb227568
parent 204 cd9e40280784
child 294 c1de75d20aa4
equal deleted inserted replaced
285:acc027964d10 286:804fbb227568
     7 case object ONE extends Rexp
     7 case object ONE extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     8 case class CHAR(c: Char) extends Rexp
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class STAR(r: Rexp) extends Rexp 
    11 case class STAR(r: Rexp) extends Rexp 
    12 case class RECD(x: String, r: Rexp) extends Rexp
    12 
    13 
    13 
    14 abstract class ARexp 
    14 abstract class ARexp 
    15 case object AZERO extends ARexp
    15 case object AZERO extends ARexp
    16 case class AONE(bs: List[Boolean]) extends ARexp
    16 case class AONE(bs: List[Boolean]) extends ARexp
    17 case class ACHAR(bs: List[Boolean], c: Char) extends ARexp
    17 case class ACHAR(bs: List[Boolean], c: Char) extends ARexp
    24 case class Chr(c: Char) extends Val
    24 case class Chr(c: Char) extends Val
    25 case class Sequ(v1: Val, v2: Val) extends Val
    25 case class Sequ(v1: Val, v2: Val) extends Val
    26 case class Left(v: Val) extends Val
    26 case class Left(v: Val) extends Val
    27 case class Right(v: Val) extends Val
    27 case class Right(v: Val) extends Val
    28 case class Stars(vs: List[Val]) extends Val
    28 case class Stars(vs: List[Val]) extends Val
    29 case class Rec(x: String, v: Val) extends Val
    29 
    30    
    30    
    31 // some convenience for typing in regular expressions
    31 // some convenience for typing in regular expressions
    32 def charlist2rexp(s : List[Char]): Rexp = s match {
    32 def charlist2rexp(s : List[Char]): Rexp = s match {
    33   case Nil => ONE
    33   case Nil => ONE
    34   case c::Nil => CHAR(c)
    34   case c::Nil => CHAR(c)
    46   def | (r: Rexp) = ALT(s, r)
    46   def | (r: Rexp) = ALT(s, r)
    47   def | (r: String) = ALT(s, r)
    47   def | (r: String) = ALT(s, r)
    48   def % = STAR(s)
    48   def % = STAR(s)
    49   def ~ (r: Rexp) = SEQ(s, r)
    49   def ~ (r: Rexp) = SEQ(s, r)
    50   def ~ (r: String) = SEQ(s, r)
    50   def ~ (r: String) = SEQ(s, r)
    51   def $ (r: Rexp) = RECD(s, r)
    51 }
    52 }
    52 
       
    53 
       
    54 // nullable function: tests whether the regular 
       
    55 // expression can recognise the empty string
       
    56 def nullable (r: Rexp) : Boolean = r match {
       
    57   case ZERO => false
       
    58   case ONE => true
       
    59   case CHAR(_) => false
       
    60   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    61   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    62   case STAR(_) => true
       
    63 }
       
    64 
       
    65 // derivative of a regular expression w.r.t. a character
       
    66 def der (c: Char, r: Rexp) : Rexp = r match {
       
    67   case ZERO => ZERO
       
    68   case ONE => ZERO
       
    69   case CHAR(d) => if (c == d) ONE else ZERO
       
    70   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    71   case SEQ(r1, r2) => 
       
    72     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    73     else SEQ(der(c, r1), r2)
       
    74   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    75 }
       
    76 
       
    77 // derivative w.r.t. a string (iterates der)
       
    78 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    79   case Nil => r
       
    80   case c::s => ders(s, der(c, r))
       
    81 }
       
    82 
       
    83 // mkeps and injection part
       
    84 def mkeps(r: Rexp) : Val = r match {
       
    85   case ONE => Empty
       
    86   case ALT(r1, r2) => 
       
    87     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
    88   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
    89   case STAR(r) => Stars(Nil)
       
    90 }
       
    91 
       
    92 
       
    93 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
    94   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
    95   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
    96   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
    97   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
    98   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
    99   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   100   case (CHAR(d), Empty) => Chr(c) 
       
   101 }
       
   102 
       
   103 // main lexing function (produces a value)
       
   104 // - no simplification
       
   105 def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   106   case Nil => if (nullable(r)) mkeps(r) 
       
   107               else throw new Exception("Not matched")
       
   108   case c::cs => inj(r, c, lex(der(c, r), cs))
       
   109 }
       
   110 
       
   111 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   112 
       
   113 
       
   114 
       
   115 // Bitcoded + Annotation
       
   116 //=======================
    53 
   117 
    54 // translation into ARexps
   118 // translation into ARexps
    55 def fuse(bs: List[Boolean], r: ARexp) : ARexp = r match {
   119 def fuse(bs: List[Boolean], r: ARexp) : ARexp = r match {
    56   case AZERO => AZERO
   120   case AZERO => AZERO
    57   case AONE(cs) => AONE(bs ++ cs)
   121   case AONE(cs) => AONE(bs ++ cs)
    66   case ONE => AONE(Nil)
   130   case ONE => AONE(Nil)
    67   case CHAR(c) => ACHAR(Nil, c)
   131   case CHAR(c) => ACHAR(Nil, c)
    68   case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2)))
   132   case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2)))
    69   case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
   133   case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
    70   case STAR(r) => ASTAR(Nil, internalise(r))
   134   case STAR(r) => ASTAR(Nil, internalise(r))
    71   case RECD(x, r) => internalise(r)
       
    72 }
   135 }
    73 
   136 
    74 internalise(("a" | "ab") ~ ("b" | ""))
   137 internalise(("a" | "ab") ~ ("b" | ""))
       
   138 
       
   139 def retrieve(r: ARexp, v: Val) : List[Boolean] = (r, v) match {
       
   140   case (AONE(bs), Empty) => bs
       
   141   case (ACHAR(bs, c), Chr(d)) => bs
       
   142   case (AALT(bs, r1, r2), Left(v)) => bs ++ retrieve(r1, v)
       
   143   case (AALT(bs, r1, r2), Right(v)) => bs ++ retrieve(r2, v)
       
   144   case (ASEQ(bs, r1, r2), Sequ(v1, v2)) => 
       
   145     bs ++ retrieve(r1, v1) ++ retrieve(r2, v2)
       
   146   case (ASTAR(bs, r), Stars(Nil)) => bs ++ List(true)
       
   147   case (ASTAR(bs, r), Stars(v :: vs)) => 
       
   148      bs ++ List(false) ++ retrieve(r, v) ++ retrieve(ASTAR(Nil, r), Stars(vs))
       
   149 }
    75 
   150 
    76 
   151 
    77 def decode_aux(r: Rexp, bs: List[Boolean]) : (Val, List[Boolean]) = (r, bs) match {
   152 def decode_aux(r: Rexp, bs: List[Boolean]) : (Val, List[Boolean]) = (r, bs) match {
    78   case (ONE, bs) => (Empty, bs)
   153   case (ONE, bs) => (Empty, bs)
    79   case (CHAR(c), bs) => (Chr(c), bs)
   154   case (CHAR(c), bs) => (Chr(c), bs)
    94     val (v, bs1) = decode_aux(r1, bs)
   169     val (v, bs1) = decode_aux(r1, bs)
    95     val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
   170     val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
    96     (Stars(v::vs), bs2)
   171     (Stars(v::vs), bs2)
    97   }
   172   }
    98   case (STAR(_), true::bs) => (Stars(Nil), bs)
   173   case (STAR(_), true::bs) => (Stars(Nil), bs)
    99   case (RECD(x, r1), bs) => {
       
   100     val (v, bs1) = decode_aux(r1, bs)
       
   101     (Rec(x, v), bs1)
       
   102   }
       
   103 }
   174 }
   104 
   175 
   105 def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match {
   176 def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match {
   106   case (v, Nil) => v
   177   case (v, Nil) => v
   107   case _ => throw new Exception("Not decodable")
   178   case _ => throw new Exception("Not decodable")
   108 }
   179 }
   109 
   180 
       
   181 def encode(v: Val) : List[Boolean] = v match {
       
   182   case Empty => Nil
       
   183   case Chr(c) => Nil
       
   184   case Left(v) => false :: encode(v)
       
   185   case Right(v) => true :: encode(v)
       
   186   case Sequ(v1, v2) => encode(v1) ::: encode(v2)
       
   187   case Stars(Nil) => List(true)
       
   188   case Stars(v::vs) => false :: encode(v) ::: encode(Stars(vs))
       
   189 }
       
   190 
       
   191 
   110 // nullable function: tests whether the aregular 
   192 // nullable function: tests whether the aregular 
   111 // expression can recognise the empty string
   193 // expression can recognise the empty string
   112 def nullable (r: ARexp) : Boolean = r match {
   194 def anullable (r: ARexp) : Boolean = r match {
   113   case AZERO => false
   195   case AZERO => false
   114   case AONE(_) => true
   196   case AONE(_) => true
   115   case ACHAR(_,_) => false
   197   case ACHAR(_,_) => false
   116   case AALT(_, r1, r2) => nullable(r1) || nullable(r2)
   198   case AALT(_, r1, r2) => anullable(r1) || anullable(r2)
   117   case ASEQ(_, r1, r2) => nullable(r1) && nullable(r2)
   199   case ASEQ(_, r1, r2) => anullable(r1) && anullable(r2)
   118   case ASTAR(_, _) => true
   200   case ASTAR(_, _) => true
   119 }
   201 }
   120 
   202 
   121 def mkepsBC(r: ARexp) : List[Boolean] = r match {
   203 def mkepsBC(r: ARexp) : List[Boolean] = r match {
   122   case AONE(bs) => bs
   204   case AONE(bs) => bs
   123   case AALT(bs, r1, r2) => 
   205   case AALT(bs, r1, r2) => 
   124     if (nullable(r1)) bs ++ mkepsBC(r1) else bs ++ mkepsBC(r2)
   206     if (anullable(r1)) bs ++ mkepsBC(r1) else bs ++ mkepsBC(r2)
   125   case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
   207   case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
   126   case ASTAR(bs, r) => bs ++ List(true)
   208   case ASTAR(bs, r) => bs ++ List(true)
   127 }
   209 }
   128 
   210 
   129 // derivative of a regular expression w.r.t. a character
   211 // derivative of a regular expression w.r.t. a character
   130 def der (c: Char, r: ARexp) : ARexp = r match {
   212 def ader(c: Char, r: ARexp) : ARexp = r match {
   131   case AZERO => AZERO
   213   case AZERO => AZERO
   132   case AONE(_) => AZERO
   214   case AONE(_) => AZERO
   133   case ACHAR(bs, d) => if (c == d) AONE(bs) else AZERO
   215   case ACHAR(bs, d) => if (c == d) AONE(bs) else AZERO
   134   case AALT(bs, r1, r2) => AALT(bs, der(c, r1), der(c, r2))
   216   case AALT(bs, r1, r2) => AALT(bs, ader(c, r1), ader(c, r2))
   135   case ASEQ(bs, r1, r2) => 
   217   case ASEQ(bs, r1, r2) => 
   136     if (nullable(r1)) AALT(bs, ASEQ(Nil, der(c, r1), r2), fuse(mkepsBC(r1), der(c, r2)))
   218     if (anullable(r1)) AALT(bs, ASEQ(Nil, ader(c, r1), r2), fuse(mkepsBC(r1), ader(c, r2)))
   137     else ASEQ(bs, der(c, r1), r2)
   219     else ASEQ(bs, ader(c, r1), r2)
   138   case ASTAR(bs, r) => ASEQ(bs, fuse(List(false), der(c, r)), ASTAR(Nil, r))
   220   case ASTAR(bs, r) => ASEQ(bs, fuse(List(false), ader(c, r)), ASTAR(Nil, r))
   139 }
   221 }
   140 
   222 
   141 // derivative w.r.t. a string (iterates der)
   223 // derivative w.r.t. a string (iterates der)
   142 @tailrec
   224 @tailrec
   143 def ders (s: List[Char], r: ARexp) : ARexp = s match {
   225 def aders (s: List[Char], r: ARexp) : ARexp = s match {
   144   case Nil => r
   226   case Nil => r
   145   case c::s => ders(s, der(c, r))
   227   case c::s => aders(s, ader(c, r))
   146 }
   228 }
   147 
   229 
   148 // main unsimplified lexing function (produces a value)
   230 // main unsimplified lexing function (produces a value)
   149 def lex(r: ARexp, s: List[Char]) : List[Boolean] = s match {
   231 def alex(r: ARexp, s: List[Char]) : List[Boolean] = s match {
   150   case Nil => if (nullable(r)) mkepsBC(r) else throw new Exception("Not matched")
   232   case Nil => if (anullable(r)) mkepsBC(r) else throw new Exception("Not matched")
   151   case c::cs => lex(der(c, r), cs)
   233   case c::cs => alex(ader(c, r), cs)
   152 }
   234 }
   153 
   235 
   154 def pre_lexing(r: Rexp, s: String) = lex(internalise(r), s.toList)
   236 def pre_alexing(r: ARexp, s: String)  : List[Boolean] = alex(r, s.toList)
   155 def lexing(r: Rexp, s: String) : Val = decode(r, lex(internalise(r), s.toList))
   237 def alexing(r: Rexp, s: String) : Val = decode(r, pre_alexing(internalise(r), s))
   156 
   238 
   157 
   239 
   158 
   240 def asimp(r: ARexp): ARexp = r match {
   159 def simp(r: ARexp): ARexp = r match {
   241   case ASEQ(bs1, r1, r2) => (asimp(r1), asimp(r2)) match {
   160   case ASEQ(bs1, r1, r2) => (simp(r1), simp(r2)) match {
       
   161       case (AZERO, _) => AZERO
   242       case (AZERO, _) => AZERO
   162       case (_, AZERO) => AZERO
   243       case (_, AZERO) => AZERO
   163       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   244       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   164       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   245       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   165   }
   246   }
   166   case AALT(bs1, r1, r2) => (simp(r1), simp(r2)) match {
   247   case AALT(bs1, r1, r2) => (asimp(r1), asimp(r2)) match {
   167       case (AZERO, r2s) => fuse(bs1, r2s)
   248       case (AZERO, r2s) => fuse(bs1, r2s)
   168       case (r1s, AZERO) => fuse(bs1, r1s)
   249       case (r1s, AZERO) => fuse(bs1, r1s)
   169       case (r1s, r2s) => AALT(bs1, r1s, r2s)
   250       case (r1s, r2s) => AALT(bs1, r1s, r2s)
   170   }
   251   }
   171   case r => r
   252   case r => r
   172 }
   253 }
   173 
   254 
   174 def lex_simp(r: ARexp, s: List[Char]) : List[Boolean] = s match {
   255 def alex_simp(r: ARexp, s: List[Char]) : List[Boolean] = s match {
   175   case Nil => if (nullable(r)) mkepsBC(r) else throw new Exception("Not matched")
   256   case Nil => if (anullable(r)) mkepsBC(r) 
   176   case c::cs => lex(simp(der(c, r)), cs)
   257               else throw new Exception("Not matched")
   177 }
   258   case c::cs => alex(asimp(ader(c, r)), cs)
   178 
   259 }
   179 def lexing_simp(r: Rexp, s: String) : Val = decode(r, lex_simp(internalise(r), s.toList))
   260 
       
   261 def alexing_simp(r: Rexp, s: String) : Val = 
       
   262   decode(r, alex_simp(internalise(r), s.toList))
   180 
   263 
   181 
   264 
   182 
   265 
   183 // extracts a string from value
   266 // extracts a string from value
   184 def flatten(v: Val) : String = v match {
   267 def flatten(v: Val) : String = v match {
   186   case Chr(c) => c.toString
   269   case Chr(c) => c.toString
   187   case Left(v) => flatten(v)
   270   case Left(v) => flatten(v)
   188   case Right(v) => flatten(v)
   271   case Right(v) => flatten(v)
   189   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
   272   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
   190   case Stars(vs) => vs.map(flatten).mkString
   273   case Stars(vs) => vs.map(flatten).mkString
   191   case Rec(_, v) => flatten(v)
       
   192 }
   274 }
   193 
   275 
   194 // extracts an environment from a value
   276 // extracts an environment from a value
   195 def env(v: Val) : List[(String, String)] = v match {
   277 def env(v: Val) : List[(String, String)] = v match {
   196   case Empty => Nil
   278   case Empty => Nil
   197   case Chr(c) => Nil
   279   case Chr(c) => Nil
   198   case Left(v) => env(v)
   280   case Left(v) => env(v)
   199   case Right(v) => env(v)
   281   case Right(v) => env(v)
   200   case Sequ(v1, v2) => env(v1) ::: env(v2)
   282   case Sequ(v1, v2) => env(v1) ::: env(v2)
   201   case Stars(vs) => vs.flatMap(env)
   283   case Stars(vs) => vs.flatMap(env)
   202   case Rec(x, v) => (x, flatten(v))::env(v)
       
   203 }
   284 }
   204 
   285 
   205 // Some Tests
   286 // Some Tests
   206 //============
   287 //============
   207 
   288 
   212   (end - start)/(i * 1.0e9)
   293   (end - start)/(i * 1.0e9)
   213 }
   294 }
   214 
   295 
   215 
   296 
   216 val rf = ("a" | "ab") ~ ("ab" | "")
   297 val rf = ("a" | "ab") ~ ("ab" | "")
   217 println(pre_lexing(rf, "ab"))
   298 println(pre_alexing(internalise(rf), "ab"))
   218 println(lexing(rf, "ab"))
   299 println(alexing(rf, "ab"))
   219 println(lexing_simp(rf, "ab"))
   300 println(alexing_simp(rf, "ab"))
   220 
   301 
   221 val r0 = ("a" | "ab") ~ ("b" | "")
   302 val r0 = ("a" | "ab") ~ ("b" | "")
   222 println(pre_lexing(r0, "ab"))
   303 println(pre_alexing(internalise(r0), "ab"))
   223 println(lexing(r0, "ab"))
   304 println(alexing(r0, "ab"))
   224 println(lexing_simp(r0, "ab"))
   305 println(alexing_simp(r0, "ab"))
   225 
   306 
   226 val r1 = ("a" | "ab") ~ ("bcd" | "cd")
   307 val r1 = ("a" | "ab") ~ ("bcd" | "cd")
   227 println(lexing(r1, "abcd"))
   308 println(alexing(r1, "abcd"))
   228 println(lexing_simp(r1, "abcd"))
   309 println(alexing_simp(r1, "abcd"))
   229 
   310 
   230 println(lexing((("" | "a") ~ ("ab" | "b")), "ab"))
   311 println(alexing((("" | "a") ~ ("ab" | "b")), "ab"))
   231 println(lexing_simp((("" | "a") ~ ("ab" | "b")), "ab"))
   312 println(alexing_simp((("" | "a") ~ ("ab" | "b")), "ab"))
   232 
   313 
   233 println(lexing((("" | "a") ~ ("b" | "ab")), "ab"))
   314 println(alexing((("" | "a") ~ ("b" | "ab")), "ab"))
   234 println(lexing_simp((("" | "a") ~ ("b" | "ab")), "ab"))
   315 println(alexing_simp((("" | "a") ~ ("b" | "ab")), "ab"))
   235 
   316 
   236 println(lexing((("" | "a") ~ ("c" | "ab")), "ab"))
   317 println(alexing((("" | "a") ~ ("c" | "ab")), "ab"))
   237 println(lexing_simp((("" | "a") ~ ("c" | "ab")), "ab"))
   318 println(alexing_simp((("" | "a") ~ ("c" | "ab")), "ab"))
   238 
       
   239 
       
   240 // Two Simple Tests for the While Language
       
   241 //========================================
       
   242 
       
   243 // Lexing Rules 
       
   244 
       
   245 def PLUS(r: Rexp) = r ~ r.%
       
   246 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
       
   247 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
       
   248 val ID = SYM ~ (SYM | DIGIT).% 
       
   249 val NUM = PLUS(DIGIT)
       
   250 val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
       
   251 val SEMI: Rexp = ";"
       
   252 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
       
   253 val WHITESPACE = PLUS(" " | "\n" | "\t")
       
   254 val RPAREN: Rexp = ")"
       
   255 val LPAREN: Rexp = "("
       
   256 val BEGIN: Rexp = "{"
       
   257 val END: Rexp = "}"
       
   258 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
       
   259 
       
   260 val WHILE_REGS = (("k" $ KEYWORD) | 
       
   261                   ("i" $ ID) | 
       
   262                   ("o" $ OP) | 
       
   263                   ("n" $ NUM) | 
       
   264                   ("s" $ SEMI) | 
       
   265                   ("str" $ STRING) |
       
   266                   ("p" $ (LPAREN | RPAREN)) | 
       
   267                   ("b" $ (BEGIN | END)) | 
       
   268                   ("w" $ WHITESPACE)).%
       
   269 
       
   270 println("prog0 test")
       
   271 
       
   272 val prog0 = """read n"""
       
   273 println(env(lexing(WHILE_REGS, prog0)))
       
   274 println(env(lexing_simp(WHILE_REGS, prog0)))
       
   275 
       
   276 println("prog1 test")
       
   277 
       
   278 val prog1 = """read  n; write (n)"""
       
   279 println(env(lexing(WHILE_REGS, prog1)))
       
   280 println(env(lexing_simp(WHILE_REGS, prog1)))
       
   281 
   319 
   282 
   320 
   283 // Sulzmann's tests
   321 // Sulzmann's tests
   284 //==================
   322 //==================
   285 
   323 
   286 val sulzmann = ("a" | "b" | "ab").%
   324 val sulzmann = ("a" | "b" | "ab").%
   287 
   325 
   288 println(lexing(sulzmann, "a" * 10))
   326 println(alexing(sulzmann, "a" * 10))
   289 println(lexing_simp(sulzmann, "a" * 10))
   327 println(alexing_simp(sulzmann, "a" * 10))
   290 
   328 
   291 for (i <- 1 to 6501 by 500) {
   329 for (i <- 1 to 4001 by 500) {
   292   println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "a" * i))))
   330   println(i + ": " + "%.5f".format(time_needed(1, alexing_simp(sulzmann, "a" * i))))
   293 }
   331 }
   294 
   332 
   295 for (i <- 1 to 16 by 5) {
   333 for (i <- 1 to 16 by 5) {
   296   println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i))))
   334   println(i + ": " + "%.5f".format(time_needed(1, alexing_simp(sulzmann, "ab" * i))))
   297 }
   335 }
       
   336 
       
   337 
       
   338 
       
   339 
       
   340 // some automatic testing
       
   341 
       
   342 def clear() = {
       
   343   print("")
       
   344   //print("\33[H\33[2J")
       
   345 }
       
   346 
       
   347 // enumerates regular expressions until a certain depth
       
   348 def enum(n: Int, s: String) : Stream[Rexp] = n match {
       
   349   case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
       
   350   case n => {  
       
   351     val rs = enum(n - 1, s)
       
   352     rs #:::
       
   353     (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
       
   354     (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
       
   355     (for (r1 <- rs) yield STAR(r1))
       
   356   }
       
   357 }
       
   358 
       
   359 
       
   360 //enum(2, "ab").size
       
   361 //enum(3, "ab").size
       
   362 //enum(3, "abc").size
       
   363 //enum(4, "ab").size
       
   364 
       
   365 import scala.util.Try
       
   366 
       
   367 def test_mkeps(r: Rexp) = {
       
   368   val res1 = Try(Some(mkeps(r))).getOrElse(None)
       
   369   val res2 = Try(Some(decode(r, mkepsBC(internalise(r))))).getOrElse(None) 
       
   370   if (res1 != res2) println(s"Mkeps disagrees on ${r}")
       
   371   if (res1 != res2) Some(r) else (None)
       
   372 }
       
   373 
       
   374 println("Testing mkeps")
       
   375 enum(2, "ab").map(test_mkeps).toSet
       
   376 //enum(3, "ab").map(test_mkeps).toSet
       
   377 //enum(3, "abc").map(test_mkeps).toSet
       
   378 
       
   379 
       
   380 //enumerates strings of length n over alphabet cs
       
   381 def strs(n: Int, cs: String) : Set[String] = {
       
   382   if (n == 0) Set("")
       
   383   else {
       
   384     val ss = strs(n - 1, cs)
       
   385     ss ++
       
   386     (for (s <- ss; c <- cs.toList) yield c + s)
       
   387   }
       
   388 }
       
   389 
       
   390 //tests lexing and lexingB
       
   391 def tests_inj(ss: Set[String])(r: Rexp) = {
       
   392   clear()
       
   393   println(s"Testing ${r}")
       
   394   for (s <- ss.par) yield {
       
   395     val res1 = Try(Some(alexing(r, s))).getOrElse(None)
       
   396     val res2 = Try(Some(alexing_simp(r, s))).getOrElse(None)
       
   397     if (res1 != res2) println(s"Disagree on ${r} and ${s}")
       
   398     if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       
   399     if (res1 != res2) Some((r, s)) else None
       
   400   }
       
   401 }
       
   402 
       
   403 //println("Testing lexing 1")
       
   404 //enum(2, "ab").map(tests_inj(strs(2, "ab"))).toSet
       
   405 //println("Testing lexing 2")
       
   406 //enum(2, "ab").map(tests_inj(strs(3, "abc"))).toSet
       
   407 //println("Testing lexing 3")
       
   408 //enum(3, "ab").map(tests_inj(strs(3, "abc"))).toSet
       
   409 
       
   410 
       
   411 def tests_alexer(ss: Set[String])(r: Rexp) = {
       
   412   clear()
       
   413   println(s"Testing ${r}")
       
   414   for (s <- ss.par) yield {
       
   415     val d = der('b', r)
       
   416     val ad = ader('b', internalise(r))
       
   417     val res1 = Try(Some(encode(inj(r, 'a', alexing(d, s))))).getOrElse(None)
       
   418     val res2 = Try(Some(pre_alexing(ad, s))).getOrElse(None)
       
   419     if (res1 != res2) println(s"Disagree on ${r} and 'a'::${s}")
       
   420     if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       
   421     if (res1 != res2) Some((r, s)) else None
       
   422   }
       
   423 }
       
   424 
       
   425 println("Testing alexing 1")
       
   426 println(enum(2, "ab").map(tests_alexer(strs(2, "ab"))).toSet)
       
   427 
       
   428 
       
   429 def values(r: Rexp) : Set[Val] = r match {
       
   430   case ZERO => Set()
       
   431   case ONE => Set(Empty)
       
   432   case CHAR(c) => Set(Chr(c))
       
   433   case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++ 
       
   434                       (for (v2 <- values(r2)) yield Right(v2))
       
   435   case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2)
       
   436   case STAR(r) => (Set(Stars(Nil)) ++ 
       
   437                   (for (v <- values(r)) yield Stars(List(v)))) 
       
   438     // to do more would cause the set to be infinite
       
   439 }
       
   440 
       
   441 def tests_ader(c: Char)(r: Rexp) = {
       
   442   val d = der(c, r)
       
   443   val vals = values(d)
       
   444   for (v <- vals) {
       
   445     println(s"Testing ${r} and ${v}")
       
   446     val res1 = retrieve(ader(c, internalise(r)), v)
       
   447     val res2 = encode(inj(r, c, decode(d, retrieve(internalise(der(c, r)), v))))
       
   448     if (res1 != res2) println(s"Disagree on ${r}, ${v} and der = ${d}")
       
   449     if (res1 != res2) println(s"   ${res1} !=  ${res2}")
       
   450     if (res1 != res2) Some((r, v)) else None
       
   451   }
       
   452 }
       
   453 
       
   454 println("Testing ader/der")
       
   455 println(enum(2, "ab").map(tests_ader('a')).toSet)
       
   456 
       
   457 val er = SEQ(ONE,CHAR('a')) 
       
   458 val ev = Right(Empty) 
       
   459 val ed = ALT(SEQ(ZERO,CHAR('a')),ONE)
       
   460 
       
   461 retrieve(internalise(ed), ev) // => [true]
       
   462 internalise(er)
       
   463 ader('a', internalise(er))
       
   464 retrieve(ader('a', internalise(er)), ev) // => []
       
   465 decode(ed, List(true)) // gives the value for derivative
       
   466 decode(er, List())     // gives the value for original value
       
   467 
       
   468 
       
   469 val dr = STAR(CHAR('a'))
       
   470 val dr_der = SEQ(ONE,STAR(CHAR('a'))) // derivative of dr
       
   471 val dr_val = Sequ(Empty,Stars(List())) // value of dr_def
       
   472 
       
   473 
       
   474 val res1 = retrieve(internalise(der('a', dr)), dr_val) // => [true]
       
   475 val res2 = retrieve(ader('a', internalise(dr)), dr_val) // => [false, true]
       
   476 decode(dr_der, res1) // gives the value for derivative
       
   477 decode(dr, res2)     // gives the value for original value
       
   478 
       
   479 encode(inj(dr, 'a', decode(dr_der, res1)))
       
   480