progs/scala/tests.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Tue, 17 May 2016 03:47:33 +0100
changeset 180 42ffaca7c85e
parent 169 072a701bb153
child 195 c2d36c3cf8ad
permissions -rw-r--r--
isarfied the simplify theory
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
import scala.language.implicitConversions    
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
import scala.language.reflectiveCalls
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
import scala.annotation.tailrec   
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
import scala.io.Source
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
import scala.util.parsing.combinator._
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
abstract class Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
case object ZERO extends Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
case object ONE extends Rexp
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
case class CHAR(c: Char) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
  override def toString = c.toString 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
  override def toString = "(" + r1.toString + "|" + r2.toString + ")" 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
  override def toString = "(" + r1.toString + r2.toString +")"
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
} 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
case class Parser(s: String) {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
  var i = 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
  
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
  def peek() = s(i)
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
  def eat(c: Char) = 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
    if (c == s(i)) i = i + 1 else throw new Exception("Expected " + c + " got " + s(i))
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
  def next() = { i = i + 1; s(i - 1) }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
  def more() = s.length - i > 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
  def Regex() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
    val t = Term();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
    if (more() && peek() == '|') {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
    34
      eat ('|') ; 
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
    35
      ALT(t, Regex()) 
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
    } 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
    else t
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
  def Term() : Rexp = {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
    41
    var f : Rexp = 
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
    42
      if (more() && peek() != ')' && peek() != '|') Factor() else ZERO;
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
    while (more() && peek() != ')' && peek() != '|') {
169
072a701bb153 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 168
diff changeset
    44
      f = SEQ(f, Factor()) ;
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
    f
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
  def Factor() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
    var b = Base();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
    while (more() && peek() == '*') {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
      eat('*') ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
      b = STAR(b) ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
    b
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    60
  def Base() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    61
    peek() match {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    62
      case '(' => { eat('(') ; val r = Regex(); eat(')') ; r }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    63
      case _ => CHAR(next())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    65
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    66
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    67
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    68
println(Parser("a|(bc)*").Regex())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    69
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    70
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    71
def process(line: String) : String = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    72
  val s = line.split("\\t+")(1)
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    73
  s + ":   " + Parser(s).Regex().toString
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    74
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    76
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    77
val filename = "../tests/forced-assoc.txt"
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    78
val filelines : List[String] = Source.fromFile(filename).getLines.toList
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    80
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    81
filelines.foreach((s: String) => println(process(s)))
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    82