progs/scala/tests.scala
changeset 195 c2d36c3cf8ad
parent 169 072a701bb153
child 196 5fa8344a5176
equal deleted inserted replaced
194:761793cce563 195:c2d36c3cf8ad
     8 case object ZERO extends Rexp 
     8 case object ZERO extends Rexp 
     9 case object ONE extends Rexp
     9 case object ONE extends Rexp
    10 case class CHAR(c: Char) extends Rexp {
    10 case class CHAR(c: Char) extends Rexp {
    11   override def toString = c.toString 
    11   override def toString = c.toString 
    12 }
    12 }
       
    13 case object ANYCHAR extends Rexp {
       
    14   override def toString = "." 
       
    15 }
    13 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
    16 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
    14   override def toString = "(" + r1.toString + "|" + r2.toString + ")" 
    17   override def toString = "(" + r1.toString + "|" + r2.toString + ")" 
    15 }
    18 }
    16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
    19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
    17   override def toString = "(" + r1.toString + r2.toString +")"
    20   override def toString = "(" + r1.toString + r2.toString +")"
    18 } 
    21 } 
    19 case class STAR(r: Rexp) extends Rexp 
    22 case class STAR(r: Rexp) extends Rexp 
    20 case class RECD(x: String, r: Rexp) extends Rexp
    23 case class RECD(x: String, r: Rexp) extends Rexp {
       
    24   override def toString = "[" + r.toString +"]"
       
    25 }
       
    26 
       
    27 
       
    28 abstract class Val
       
    29 case object Empty extends Val
       
    30 case class Chr(c: Char) extends Val
       
    31 case class Sequ(v1: Val, v2: Val) extends Val
       
    32 case class Left(v: Val) extends Val
       
    33 case class Right(v: Val) extends Val
       
    34 case class Stars(vs: List[Val]) extends Val
       
    35    
       
    36 // nullable function: tests whether the regular 
       
    37 // expression can recognise the empty string
       
    38 def nullable (r: Rexp) : Boolean = r match {
       
    39   case ZERO => false
       
    40   case ONE => true
       
    41   case CHAR(_) => false
       
    42   case ANYCHAR => false
       
    43   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    44   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    45   case STAR(_) => true
       
    46   case RECD(_, r1) => nullable(r1)
       
    47 }
       
    48 
       
    49 // derivative of a regular expression w.r.t. a character
       
    50 def der (c: Char, r: Rexp) : Rexp = r match {
       
    51   case ZERO => ZERO
       
    52   case ONE => ZERO
       
    53   case CHAR(d) => if (c == d) ONE else ZERO
       
    54   case ANYCHAR => ONE
       
    55   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    56   case SEQ(r1, r2) => 
       
    57     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    58     else SEQ(der(c, r1), r2)
       
    59   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    60   case RECD(_, r1) => der(c, r1)
       
    61 }
       
    62 
       
    63 // derivative w.r.t. a string (iterates der)
       
    64 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    65   case Nil => r
       
    66   case c::s => ders(s, der(c, r))
       
    67 }
       
    68 
       
    69 // extracts a string from value
       
    70 def flatten(v: Val) : String = v match {
       
    71   case Empty => ""
       
    72   case Chr(c) => c.toString
       
    73   case Left(v) => flatten(v)
       
    74   case Right(v) => flatten(v)
       
    75   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
       
    76   case Stars(vs) => vs.map(flatten).mkString
       
    77 }
       
    78 
       
    79 // extracts an environment from a value
       
    80 def env(v: Val, r: Rexp) : List[(String, String)] = (v, r) match {
       
    81   case (Empty, ONE) => Nil
       
    82   case (Chr(c), CHAR(_)) => Nil
       
    83   case (Chr(c), ANYCHAR) => Nil
       
    84   case (Left(v), ALT(r1, r2)) => env(v, r1)
       
    85   case (Right(v), ALT(r1, r2)) => env(v, r2)
       
    86   case (Sequ(v1, v2), SEQ(r1, r2)) => env(v1, r1) ::: env(v2, r2)
       
    87   case (Stars(vs), STAR(r)) => vs.flatMap(env(_, r))
       
    88   case (v, RECD(x, r)) => (x, flatten(v))::env(v, r)
       
    89 }
       
    90 
       
    91 // extracts indices for the underlying strings
       
    92 def env2(v: Val, r: Rexp, n: Int) : (List[(Int, Int)], Int) = (v, r) match {
       
    93   case (Empty, ONE) => (Nil, n)
       
    94   case (Chr(c), CHAR(_)) => (Nil, n + 1)
       
    95   case (Chr(c), ANYCHAR) => (Nil, n + 1)
       
    96   case (Left(v), ALT(r1, r2)) => env2(v, r1, n)
       
    97   case (Right(v), ALT(r1, r2)) => env2(v, r2, n)
       
    98   case (Sequ(v1, v2), SEQ(r1, r2)) => {
       
    99    val (e1, n1) = env2(v1, r1, n) 
       
   100    val (e2, n2) = env2(v2, r2, n1)
       
   101    (e1 ::: e2, n2)
       
   102   }
       
   103   case (Stars(Nil), STAR(r)) => (Nil, n)
       
   104   case (Stars(v :: vs), STAR(r)) => {
       
   105    val (e1, n1) = env2(v, r, n) 
       
   106    val (e2, n2) = env2(Stars(vs), STAR(r), n1)
       
   107    (e1 ::: e2, n2)
       
   108   }
       
   109   case (v, RECD(x, r)) => {
       
   110     val (e1, n1) = env2(v, r, n)
       
   111     ((n, n + flatten(v).length) :: e1, n1)
       
   112   }
       
   113 }
       
   114 
       
   115 // injection part
       
   116 def mkeps(r: Rexp) : Val = r match {
       
   117   case ONE => Empty
       
   118   case ALT(r1, r2) => 
       
   119     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   120   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   121   case STAR(r) => Stars(Nil)
       
   122   case RECD(x, r) => mkeps(r)
       
   123 }
       
   124 
       
   125 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   126   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   127   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   128   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   129   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   130   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   131   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   132   case (CHAR(d), Empty) => Chr(c) 
       
   133   case (ANYCHAR, Empty) => Chr(c) 
       
   134   case (RECD(x, r1), _) => inj(r1, c, v)
       
   135 }
       
   136 
       
   137 
       
   138 // main lexing function (produces a value)
       
   139 def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   140   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
       
   141   case c::cs => inj(r, c, lex(der(c, r), cs))
       
   142 }
       
   143 
       
   144 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   145 
       
   146 
       
   147 
       
   148 // Regular expression parser
    21 
   149 
    22 case class Parser(s: String) {
   150 case class Parser(s: String) {
    23   var i = 0
   151   var i = 0
    24   
   152   
    25   def peek() = s(i)
   153   def peek() = s(i)
    37     else t
   165     else t
    38   }
   166   }
    39 
   167 
    40   def Term() : Rexp = {
   168   def Term() : Rexp = {
    41     var f : Rexp = 
   169     var f : Rexp = 
    42       if (more() && peek() != ')' && peek() != '|') Factor() else ZERO;
   170       if (more() && peek() != ')' && peek() != '|') Factor() else ONE;
    43     while (more() && peek() != ')' && peek() != '|') {
   171     while (more() && peek() != ')' && peek() != '|') {
    44       f = SEQ(f, Factor()) ;
   172       f = SEQ(f, Factor()) ;
    45     }
   173     }
    46 
       
    47     f
   174     f
    48   }
   175   }
    49 
   176 
    50   def Factor() : Rexp = {
   177   def Factor() : Rexp = {
    51     var b = Base();
   178     var b = Base();
    52     while (more() && peek() == '*') {
   179     while (more() && peek() == '*') {
    53       eat('*') ;
   180       eat('*') ;
    54       b = STAR(b) ;
   181       b = STAR(b) ;
    55     }
   182     }
    56 
   183     while (more() && peek() == '?') {
       
   184       eat('?') ;
       
   185       b = ALT(b, ONE) ;
       
   186     }
       
   187     while (more() && peek() == '+') {
       
   188       eat('+') ;
       
   189       b = SEQ(b, STAR(b)) ;
       
   190     }
    57     b
   191     b
    58   }
   192   }
    59 
   193 
    60   def Base() : Rexp = {
   194   def Base() : Rexp = {
    61     peek() match {
   195     peek() match {
    62       case '(' => { eat('(') ; val r = Regex(); eat(')') ; r }
   196       case '(' => { eat('(') ; val r = Regex(); eat(')') ; RECD("",r) }
       
   197       case '.' => { eat('.'); ANYCHAR }
    63       case _ => CHAR(next())
   198       case _ => CHAR(next())
    64     }
   199     }
    65   }
   200   }
    66 }
   201 }
    67 
   202 
    68 println(Parser("a|(bc)*").Regex())
   203 println(Parser("a|(bc)*").Regex())
    69 
   204 
    70 
   205 
    71 def process(line: String) : String = {
   206 def process(line: String) : String = {
    72   val s = line.split("\\t+")(1)
   207   if (line.head == '#') "#" else
    73   s + ":   " + Parser(s).Regex().toString
   208     {
    74 }
   209       val line_split = line.split("\\t+")
    75 
   210       val reg_str = line_split(1)
    76 
   211       val reg = RECD("", Parser(reg_str).Regex())
    77 val filename = "../tests/forced-assoc.txt"
   212       val in_str = if (line_split(2) == "-") "" else line_split(2)
       
   213       val res_str = line_split(3)
       
   214       val our_val = lexing(reg, in_str)
       
   215       val our_result = env2(our_val, reg, 0)._1.mkString("") 
       
   216       if (our_result != res_str) 
       
   217         { 
       
   218           reg_str + ":   " + 
       
   219           reg.toString + ": " + 
       
   220           in_str + "   \n " + 
       
   221           our_result +  
       
   222           " => \n" + res_str + " ! " +
       
   223           our_val + ":" + reg + "\n" 
       
   224         }
       
   225       else "*"
       
   226     }
       
   227 }
       
   228 
       
   229 
       
   230 //val filename = "../tests/forced-assoc.txt"
       
   231 //val filename = "../tests/left-assoc.txt"
       
   232 //val filename = "../tests/right-assoc.txt"
       
   233 //val filename = "../tests/class.txt"
       
   234 //val filename = "../tests/basic3.txt"
       
   235 //val filename = "../tests/totest.txt"
       
   236 //val filename = "../tests/repetition2.txt"
       
   237 //val filename = "../tests/osx-bsd-critical.txt"
       
   238 val filename = "../tests/nullsub3.txt"
    78 val filelines : List[String] = Source.fromFile(filename).getLines.toList
   239 val filelines : List[String] = Source.fromFile(filename).getLines.toList
    79 
   240 
    80 
   241 
    81 filelines.foreach((s: String) => println(process(s)))
   242 filelines.foreach((s: String) => print(process(s)))
    82 
   243