progs/scala/tests.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Fri, 06 May 2016 11:33:21 +0100
changeset 168 6b0a1976f89a
child 169 072a701bb153
permissions -rw-r--r--
added parser for regexes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
168
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
import scala.language.implicitConversions    
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
import scala.language.reflectiveCalls
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
import scala.annotation.tailrec   
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
import scala.io.Source
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
import scala.util.parsing.combinator._
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
abstract class Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
case object ZERO extends Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
case object ONE extends Rexp
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
case class CHAR(c: Char) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
  override def toString = c.toString 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
  override def toString = "(" + r1.toString + "|" + r2.toString + ")" 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
  override def toString = "(" + r1.toString + r2.toString +")"
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
} 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
case class Parser(s: String) {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
  var i = 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
  
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
  def peek() = s(i)
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
  def eat(c: Char) = 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
    if (c == s(i)) i = i + 1 else throw new Exception("Expected " + c + " got " + s(i))
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
  def next() = { i = i + 1; s(i - 1) }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
  def more() = s.length - i > 0
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
  def Regex() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
    val t = Term();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
    if (more() && peek() == '|') {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
      eat ('|') ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
      val r = Regex();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
      ALT(t, r) 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
    } 
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
    else t
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
  def Term() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
    var f : Rexp = if (more() && peek() != ')' && peek() != '|') Factor() else ZERO;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
    while (more() && peek() != ')' && peek() != '|') {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
      var nextf = Factor();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
      f = SEQ(f, nextf) ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
    f
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
  def Factor() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
    var b = Base();
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
    while (more() && peek() == '*') {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
      eat('*') ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
      b = STAR(b) ;
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
    b
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    60
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    61
  def Base() : Rexp = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    62
    peek() match {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    63
      case '(' => { eat('(') ; val r = Regex(); eat(')') ; r }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
      case _ => CHAR(next())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    65
    }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    66
  }
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    67
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    68
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    69
println(Parser("a|(bc)*").Regex())
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    70
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    71
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    72
def process(line: String) : String = {
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    73
  val s = line.split("\\t+")(1)
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    74
  s + ":   " + Parser(s).Regex().toString
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
}
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    76
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    77
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    78
val filename = "../tests/forced-assoc.txt"
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
val filelines : List[String] = Source.fromFile(filename).getLines.toList
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    80
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    81
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    82
filelines.foreach((s: String) => println(process(s)))
6b0a1976f89a added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    83