progs/token2.scala
changeset 385 7f8516ff408d
parent 368 a9911966c0df
child 388 66f66f1710ed
equal deleted inserted replaced
384:4629448c1bd9 385:7f8516ff408d
    11 case class STAR(r: Rexp) extends Rexp 
    11 case class STAR(r: Rexp) extends Rexp 
    12 case class RECD(x: String, r: Rexp) extends Rexp
    12 case class RECD(x: String, r: Rexp) extends Rexp
    13 case class CRANGE(cs: String) extends Rexp
    13 case class CRANGE(cs: String) extends Rexp
    14 case class PLUS(r: Rexp) extends Rexp
    14 case class PLUS(r: Rexp) extends Rexp
    15 case class OPT(r: Rexp) extends Rexp
    15 case class OPT(r: Rexp) extends Rexp
       
    16 case class NTIMES(r: Rexp, n: Int) extends Rexp
    16 
    17 
    17 abstract class Val
    18 abstract class Val
    18 case object Empty extends Val
    19 case object Empty extends Val
    19 case class Chr(c: Char) extends Val
    20 case class Chr(c: Char) extends Val
    20 case class Seq(v1: Val, v2: Val) extends Val
    21 case class Seq(v1: Val, v2: Val) extends Val
    57   case STAR(_) => true
    58   case STAR(_) => true
    58   case RECD(_, r) => nullable(r)
    59   case RECD(_, r) => nullable(r)
    59   case CRANGE(_) => false
    60   case CRANGE(_) => false
    60   case PLUS(r) => nullable(r)
    61   case PLUS(r) => nullable(r)
    61   case OPT(_) => true
    62   case OPT(_) => true
       
    63   case NTIMES(r, n) => if (n == 0) true else nullable(r)
    62 }
    64 }
    63 
    65 
    64 // derivative of a regular expression w.r.t. a character
    66 // derivative of a regular expression w.r.t. a character
    65 def der (c: Char, r: Rexp) : Rexp = r match {
    67 def der (c: Char, r: Rexp) : Rexp = r match {
    66   case NULL => NULL
    68   case NULL => NULL
    73   case STAR(r) => SEQ(der(c, r), STAR(r))
    75   case STAR(r) => SEQ(der(c, r), STAR(r))
    74   case RECD(_, r1) => der(c, r1)
    76   case RECD(_, r1) => der(c, r1)
    75   case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL
    77   case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL
    76   case PLUS(r) => SEQ(der(c, r), STAR(r))
    78   case PLUS(r) => SEQ(der(c, r), STAR(r))
    77   case OPT(r) => ALT(der(c, r), NULL)
    79   case OPT(r) => ALT(der(c, r), NULL)
       
    80   case NTIMES(r, n) => if (n == 0) NULL else der(c, SEQ(r, NTIMES(r, n - 1)))
    78 }
    81 }
    79 
    82 
    80 // derivative w.r.t. a string (iterates der)
    83 // derivative w.r.t. a string (iterates der)
    81 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    84 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    82   case Nil => r
    85   case Nil => r
   113   case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2))
   116   case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2))
   114   case STAR(r) => Stars(Nil)
   117   case STAR(r) => Stars(Nil)
   115   case RECD(x, r) => Rec(x, mkeps(r))
   118   case RECD(x, r) => Rec(x, mkeps(r))
   116   case PLUS(r) => Stars(List(mkeps(r)))
   119   case PLUS(r) => Stars(List(mkeps(r)))
   117   case OPT(_) => Right(Empty)
   120   case OPT(_) => Right(Empty)
       
   121   case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r)))
       
   122   case _ => { println ("r : " + r.toString); 
       
   123               throw new Exception("mkeps error")}
   118 }
   124 }
   119 
   125 
   120 
   126 
   121 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   127 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   122   case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   128   case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   128   case (CHAR(_), Empty) => Chr(c) 
   134   case (CHAR(_), Empty) => Chr(c) 
   129   case (CRANGE(_), Empty) => Chr(c) 
   135   case (CRANGE(_), Empty) => Chr(c) 
   130   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   136   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   131   case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   137   case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   132   case (OPT(r), Left(v1)) => Left(inj(r, c, v1))
   138   case (OPT(r), Left(v1)) => Left(inj(r, c, v1))
       
   139   case (NTIMES(r, n), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   140   case (NTIMES(r, n), Left(Seq(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs)
       
   141   case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs)
       
   142   case _ => { println ("r : " + r.toString + "  v: " + v.toString); 
       
   143               throw new Exception("inj error")}
   133 }
   144 }
   134 
   145 
   135 // main lexing function (produces a value)
   146 // main lexing function (produces a value)
   136 def lex(r: Rexp, s: List[Char]) : Val = s match {
   147 def lex(r: Rexp, s: List[Char]) : Val = s match {
   137   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   148   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   238 
   249 
   239 // filters out all white spaces
   250 // filters out all white spaces
   240 def tokenise(r: Rexp, s: String) = 
   251 def tokenise(r: Rexp, s: String) = 
   241   env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}.mkString("\n")
   252   env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}.mkString("\n")
   242 
   253 
   243 
       
   244 //   Testing
   254 //   Testing
   245 //============
   255 //============
   246 
   256 
   247 def time[T](code: => T) = {
   257 def time[T](code: => T) = {
   248   val start = System.nanoTime()
   258   val start = System.nanoTime()
   249   val result = code
   259   val result = code
   250   val end = System.nanoTime()
   260   val end = System.nanoTime()
   251   println((end - start)/1.0e9)
   261   println((end - start)/1.0e9)
   252   result
   262   result
   253 }
   263 }
       
   264 
       
   265 println(lexing_simp(OPT("1"), "1"))
       
   266 println(lexing_simp(OPT("1"), ""))
       
   267 println(ders("111".toList, NTIMES("1",3)))
       
   268 println(lexing_simp(NTIMES("1",3), "111"))
   254 
   269 
   255 val r1 = ("a" | "ab") ~ ("bcd" | "c")
   270 val r1 = ("a" | "ab") ~ ("bcd" | "c")
   256 println(lexing(r1, "abcd"))
   271 println(lexing(r1, "abcd"))
   257 
   272 
   258 val r2 = ("" | "a") ~ ("ab" | "b")
   273 val r2 = ("" | "a") ~ ("ab" | "b")