progs/scala/tests.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Sat, 11 Jun 2016 13:28:45 +0100
changeset 196 5fa8344a5176
parent 195 c2d36c3cf8ad
child 197 a35041d5707c
permissions -rw-r--r--
added test processing
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
import scala.language.implicitConversions    
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
import scala.language.reflectiveCalls
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
import scala.annotation.tailrec   
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
import scala.io.Source
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
import scala.util.parsing.combinator._
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
abstract class Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
case object ZERO extends Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
case object ONE extends Rexp
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
case class CHAR(c: Char) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
  override def toString = c.toString 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
}
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    13
case object ANYCHAR extends Rexp {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    14
  override def toString = "." 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    15
}
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
  override def toString = "(" + r1.toString + "|" + r2.toString + ")" 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
  override def toString = "(" + r1.toString + r2.toString +")"
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
} 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
case class STAR(r: Rexp) extends Rexp 
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    23
case class RECD(x: String, r: Rexp) extends Rexp {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    24
  override def toString = "[" + r.toString +"]"
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    25
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    26
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    27
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    28
abstract class Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    29
case object Empty extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    30
case class Chr(c: Char) extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    31
case class Sequ(v1: Val, v2: Val) extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    32
case class Left(v: Val) extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    33
case class Right(v: Val) extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    34
case class Stars(vs: List[Val]) extends Val
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    35
   
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    36
// nullable function: tests whether the regular 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    37
// expression can recognise the empty string
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    38
def nullable (r: Rexp) : Boolean = r match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    39
  case ZERO => false
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    40
  case ONE => true
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    41
  case CHAR(_) => false
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    42
  case ANYCHAR => false
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    43
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    44
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    45
  case STAR(_) => true
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    46
  case RECD(_, r1) => nullable(r1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    47
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    48
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    49
// derivative of a regular expression w.r.t. a character
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    50
def der (c: Char, r: Rexp) : Rexp = r match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    51
  case ZERO => ZERO
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    52
  case ONE => ZERO
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    53
  case CHAR(d) => if (c == d) ONE else ZERO
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    54
  case ANYCHAR => ONE
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    55
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    56
  case SEQ(r1, r2) => 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    57
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    58
    else SEQ(der(c, r1), r2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    59
  case STAR(r) => SEQ(der(c, r), STAR(r))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    60
  case RECD(_, r1) => der(c, r1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    61
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    62
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    63
// derivative w.r.t. a string (iterates der)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    64
def ders (s: List[Char], r: Rexp) : Rexp = s match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    65
  case Nil => r
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    66
  case c::s => ders(s, der(c, r))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    67
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    68
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    69
// extracts a string from value
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    70
def flatten(v: Val) : String = v match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    71
  case Empty => ""
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    72
  case Chr(c) => c.toString
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    73
  case Left(v) => flatten(v)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    74
  case Right(v) => flatten(v)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    75
  case Sequ(v1, v2) => flatten(v1) + flatten(v2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    76
  case Stars(vs) => vs.map(flatten).mkString
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    77
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    78
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    79
// extracts an environment from a value
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    80
def env(v: Val, r: Rexp) : List[(String, String)] = (v, r) match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    81
  case (Empty, ONE) => Nil
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    82
  case (Chr(c), CHAR(_)) => Nil
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    83
  case (Chr(c), ANYCHAR) => Nil
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    84
  case (Left(v), ALT(r1, r2)) => env(v, r1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    85
  case (Right(v), ALT(r1, r2)) => env(v, r2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    86
  case (Sequ(v1, v2), SEQ(r1, r2)) => env(v1, r1) ::: env(v2, r2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    87
  case (Stars(vs), STAR(r)) => vs.flatMap(env(_, r))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    88
  case (v, RECD(x, r)) => (x, flatten(v))::env(v, r)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    89
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    90
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    91
// extracts indices for the underlying strings
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    92
def env2(v: Val, r: Rexp, n: Int) : (List[(Int, Int)], Int) = (v, r) match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    93
  case (Empty, ONE) => (Nil, n)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    94
  case (Chr(c), CHAR(_)) => (Nil, n + 1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    95
  case (Chr(c), ANYCHAR) => (Nil, n + 1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    96
  case (Left(v), ALT(r1, r2)) => env2(v, r1, n)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    97
  case (Right(v), ALT(r1, r2)) => env2(v, r2, n)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    98
  case (Sequ(v1, v2), SEQ(r1, r2)) => {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
    99
   val (e1, n1) = env2(v1, r1, n) 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   100
   val (e2, n2) = env2(v2, r2, n1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   101
   (e1 ::: e2, n2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   102
  }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   103
  case (Stars(Nil), STAR(r)) => (Nil, n)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   104
  case (Stars(v :: vs), STAR(r)) => {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   105
   val (e1, n1) = env2(v, r, n) 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   106
   val (e2, n2) = env2(Stars(vs), STAR(r), n1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   107
   (e1 ::: e2, n2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   108
  }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   109
  case (v, RECD(x, r)) => {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   110
    val (e1, n1) = env2(v, r, n)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   111
    ((n, n + flatten(v).length) :: e1, n1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   112
  }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   113
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   114
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   115
// injection part
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   116
def mkeps(r: Rexp) : Val = r match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   117
  case ONE => Empty
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   118
  case ALT(r1, r2) => 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   119
    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   120
  case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   121
  case STAR(r) => Stars(Nil)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   122
  case RECD(x, r) => mkeps(r)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   123
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   124
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   125
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   126
  case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   127
  case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   128
  case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   129
  case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   130
  case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   131
  case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   132
  case (CHAR(d), Empty) => Chr(c) 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   133
  case (ANYCHAR, Empty) => Chr(c) 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   134
  case (RECD(x, r1), _) => inj(r1, c, v)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   135
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   136
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   137
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   138
// main lexing function (produces a value)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   139
def lex(r: Rexp, s: List[Char]) : Val = s match {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   140
  case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   141
  case c::cs => inj(r, c, lex(der(c, r), cs))
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   142
}
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   143
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   144
def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   145
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   146
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   147
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   148
// Regular expression parser
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   149
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   150
case class Parser(s: String) {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   151
  var i = 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   152
  
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   153
  def peek() = s(i)
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   154
  def eat(c: Char) = 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   155
    if (c == s(i)) i = i + 1 else throw new Exception("Expected " + c + " got " + s(i))
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   156
  def next() = { i = i + 1; s(i - 1) }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   157
  def more() = s.length - i > 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   158
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   159
  def Regex() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   160
    val t = Term();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   161
    if (more() && peek() == '|') {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
   162
      eat ('|') ; 
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
   163
      ALT(t, Regex()) 
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   164
    } 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   165
    else t
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   166
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   167
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   168
  def Term() : Rexp = {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
   169
    var f : Rexp = 
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   170
      if (more() && peek() != ')' && peek() != '|') Factor() else ONE;
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   171
    while (more() && peek() != ')' && peek() != '|') {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
   172
      f = SEQ(f, Factor()) ;
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   173
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   174
    f
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   175
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   176
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   177
  def Factor() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   178
    var b = Base();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   179
    while (more() && peek() == '*') {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   180
      eat('*') ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   181
      b = STAR(b) ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   182
    }
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   183
    while (more() && peek() == '?') {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   184
      eat('?') ;
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   185
      b = ALT(b, ONE) ;
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   186
    }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   187
    while (more() && peek() == '+') {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   188
      eat('+') ;
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   189
      b = SEQ(b, STAR(b)) ;
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   190
    }
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   191
    b
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   192
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   193
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   194
  def Base() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   195
    peek() match {
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   196
      case '(' => { eat('(') ; val r = Regex(); eat(')') ; RECD("",r) }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   197
      case '.' => { eat('.'); ANYCHAR }
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   198
      case _ => CHAR(next())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   199
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   200
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   201
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   202
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   203
println(Parser("a|(bc)*").Regex())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   204
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   205
196
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   206
def process_line(line: String) : String = {
195
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   207
  if (line.head == '#') "#" else
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   208
    {
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   209
      val line_split = line.split("\\t+")
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   210
      val reg_str = line_split(1)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   211
      val reg = RECD("", Parser(reg_str).Regex())
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   212
      val in_str = if (line_split(2) == "-") "" else line_split(2)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   213
      val res_str = line_split(3)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   214
      val our_val = lexing(reg, in_str)
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   215
      val our_result = env2(our_val, reg, 0)._1.mkString("") 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   216
      if (our_result != res_str) 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   217
        { 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   218
          reg_str + ":   " + 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   219
          reg.toString + ": " + 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   220
          in_str + "   \n " + 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   221
          our_result +  
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   222
          " => \n" + res_str + " ! " +
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   223
          our_val + ":" + reg + "\n" 
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   224
        }
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   225
      else "*"
c2d36c3cf8ad run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 169
diff changeset
   226
    }
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   227
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   228
196
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   229
def process_file(name : String) : Unit = {
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   230
  println("\nProcessing " + name)
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   231
  val filelines : List[String] = Source.fromFile(name).getLines.toList
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   232
  filelines.foreach((s: String) => print(process_line(s)))
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   233
}
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   234
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   235
196
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   236
val files = List("../tests/forced-assoc.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   237
                 "../tests/left-assoc.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   238
                 //"../tests/right-assoc.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   239
                 "../tests/class.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   240
                 "../tests/basic3.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   241
                 "../tests/totest.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   242
                 "../tests/repetition2.txt",
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   243
                 "../tests/osx-bsd-critical.txt")
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   244
196
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   245
files.foreach(process_file(_))
5fa8344a5176 added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 195
diff changeset
   246