progs/scala/re-basic.scala
changeset 157 1fe44fb6d0a4
parent 156 6a43ea9305ba
child 166 cab1ae6f339a
equal deleted inserted replaced
156:6a43ea9305ba 157:1fe44fb6d0a4
       
     1 /* lexer without simplification */
       
     2 
       
     3 import scala.language.implicitConversions    
       
     4 import scala.language.reflectiveCalls
       
     5 import scala.annotation.tailrec   
       
     6 
       
     7 abstract class Rexp 
       
     8 case object ZERO extends Rexp
       
     9 case object ONE extends Rexp
       
    10 case class CHAR(c: Char) extends Rexp
       
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
       
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    13 case class STAR(r: Rexp) extends Rexp 
       
    14 case class RECD(x: String, r: Rexp) extends Rexp
       
    15 
       
    16 abstract class Val
       
    17 case object Empty extends Val
       
    18 case class Chr(c: Char) extends Val
       
    19 case class Sequ(v1: Val, v2: Val) extends Val
       
    20 case class Left(v: Val) extends Val
       
    21 case class Right(v: Val) extends Val
       
    22 case class Stars(vs: List[Val]) extends Val
       
    23 case class Rec(x: String, v: Val) extends Val
       
    24    
       
    25 // some convenience for typing in regular expressions
       
    26 def charlist2rexp(s : List[Char]): Rexp = s match {
       
    27   case Nil => ONE
       
    28   case c::Nil => CHAR(c)
       
    29   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    30 }
       
    31 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    32 
       
    33 implicit def RexpOps(r: Rexp) = new {
       
    34   def | (s: Rexp) = ALT(r, s)
       
    35   def % = STAR(r)
       
    36   def ~ (s: Rexp) = SEQ(r, s)
       
    37 }
       
    38 
       
    39 implicit def stringOps(s: String) = new {
       
    40   def | (r: Rexp) = ALT(s, r)
       
    41   def | (r: String) = ALT(s, r)
       
    42   def % = STAR(s)
       
    43   def ~ (r: Rexp) = SEQ(s, r)
       
    44   def ~ (r: String) = SEQ(s, r)
       
    45   def $ (r: Rexp) = RECD(s, r)
       
    46 }
       
    47 
       
    48 // nullable function: tests whether the regular 
       
    49 // expression can recognise the empty string
       
    50 def nullable (r: Rexp) : Boolean = r match {
       
    51   case ZERO => false
       
    52   case ONE => true
       
    53   case CHAR(_) => false
       
    54   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    55   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    56   case STAR(_) => true
       
    57   case RECD(_, r1) => nullable(r1)
       
    58 }
       
    59 
       
    60 // derivative of a regular expression w.r.t. a character
       
    61 def der (c: Char, r: Rexp) : Rexp = r match {
       
    62   case ZERO => ZERO
       
    63   case ONE => ZERO
       
    64   case CHAR(d) => if (c == d) ONE else ZERO
       
    65   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    66   case SEQ(r1, r2) => 
       
    67     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    68     else SEQ(der(c, r1), r2)
       
    69   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    70   case RECD(_, r1) => der(c, r1)
       
    71 }
       
    72 
       
    73 // derivative w.r.t. a string (iterates der)
       
    74 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    75   case Nil => r
       
    76   case c::s => ders(s, der(c, r))
       
    77 }
       
    78 
       
    79 // extracts a string from value
       
    80 def flatten(v: Val) : String = v match {
       
    81   case Empty => ""
       
    82   case Chr(c) => c.toString
       
    83   case Left(v) => flatten(v)
       
    84   case Right(v) => flatten(v)
       
    85   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
       
    86   case Stars(vs) => vs.map(flatten).mkString
       
    87   case Rec(_, v) => flatten(v)
       
    88 }
       
    89 
       
    90 // extracts an environment from a value
       
    91 def env(v: Val) : List[(String, String)] = v match {
       
    92   case Empty => Nil
       
    93   case Chr(c) => Nil
       
    94   case Left(v) => env(v)
       
    95   case Right(v) => env(v)
       
    96   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
    97   case Stars(vs) => vs.flatMap(env)
       
    98   case Rec(x, v) => (x, flatten(v))::env(v)
       
    99 }
       
   100 
       
   101 // injection part
       
   102 def mkeps(r: Rexp) : Val = r match {
       
   103   case ONE => Empty
       
   104   case ALT(r1, r2) => 
       
   105     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   106   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   107   case STAR(r) => Stars(Nil)
       
   108   case RECD(x, r) => Rec(x, mkeps(r))
       
   109 }
       
   110 
       
   111 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   112   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   113   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   114   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   115   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   116   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   117   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   118   case (CHAR(d), Empty) => Chr(c) 
       
   119   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   120 }
       
   121 
       
   122 
       
   123 // main lexing function (produces a value)
       
   124 def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   125   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   126   case c::cs => inj(r, c, lex(der(c, r), cs))
       
   127 }
       
   128 
       
   129 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   130 
       
   131 // Examples
       
   132 
       
   133 val K: Rexp = "a" | "b"
       
   134 val I: Rexp = "ab" | "ba"  
       
   135 
       
   136 println(lexing((K | I).%, "abab"))
       
   137 
       
   138 val K2: Rexp = ("key" $ "a" | "b")
       
   139 val I2: Rexp = ("id" $ ("ab" | "ba"))  
       
   140 
       
   141 println(lexing((K2 | I2).%, "abaa"))
       
   142 println(env(lexing((K2 | I2).%, "abaa")))
       
   143 
       
   144