// A simple matcher for basic regular expressions
//
// Call the testcases with X = {1,2,3}
//
// amm re1.sc testX
//
// or
//
// amm re1.sc all
//
// regular expressions (as enum in Scala 3)
enum Rexp {
case ZERO // matches nothing
case ONE // matches an empty string
case CHAR(c: Char) // matches a character c
case ALT(r1: Rexp, r2: Rexp) // alternative
case SEQ(r1: Rexp, r2: Rexp) // sequence
case STAR(r: Rexp) // star
}
import Rexp._
/*
UPDATE: The videos and handouts still us the older syntax
with classes, which still works but is more verbose
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
case class STAR(r: Rexp) extends Rexp
*/
// nullable function: tests whether a regular
// expression can recognise the empty string
def nullable(r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
case CHAR(_) => false
case ALT(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
case STAR(_) => true
}
// the derivative of a regular expression w.r.t. a character
def der(c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r1) => SEQ(der(c, r1), STAR(r1))
}
// the derivative w.r.t. a string (iterates der)
def ders(s: List[Char], r: Rexp) : Rexp = s match {
case Nil => r
case c::s => ders(s, der(c, r))
}
// the main matcher function
def matcher(r: Rexp, s: String) : Boolean =
nullable(ders(s.toList, r))
// some examples from the homework
val r = SEQ(CHAR('a'), CHAR('c'))
matcher(r, "ac")
val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
der('a', r)
der('b', r)
der('c', r)
val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
der('x', r2)
der('y', der('x', r2))
der('z', der('y', der('x', r2)))
// Test Cases
//============
// the optional regular expression (one or zero times)
def OPT(r: Rexp) = ALT(r, ONE) // r + 1
// the n-times regular expression (explicitly expanded to SEQs)
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
case 0 => ONE
case 1 => r
case n => SEQ(r, NTIMES(r, n - 1))
}
// the evil regular expression (a?){n} a{n}
def EVIL1(n: Int) =
SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
// the evil regular expression (a*)* b
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
// for measuring time
def time_needed[T](i: Int, code: => T) = {
val start = System.nanoTime()
for (j <- 1 to i) code
val end = System.nanoTime()
(end - start) / (i * 1.0e9)
}
// test: (a?{n}) (a{n})
@main
def test1() = {
println("Test (a?{n}) (a{n})")
for (i <- 0 to 20 by 2) {
println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
}
}
// test: (a*)* b
@main
def test2() = {
println("Test (a*)* b")
for (i <- 0 to 20 by 2) {
println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
}
}
// the size of a regular expressions - for testing purposes
def size(r: Rexp) : Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ALT(r1, r2) => 1 + size(r1) + size(r2)
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
case STAR(r) => 1 + size(r)
}
// the expicit expansion in EVIL1(n) increases
// drastically its size - (a?){n} a{n}
size(EVIL1(1)) // 5
size(EVIL1(3)) // 17
size(EVIL1(5)) // 29
size(EVIL1(7)) // 41
size(EVIL1(20)) // 119
size(ders(("a" * 20).toList, EVIL1(20)))
// given a regular expression and building successive
// derivatives might result into bigger and bigger
// regular expressions...here is an example for this:
// (a + aa)*
val BIG = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
size(ders("".toList, BIG)) // 13
size(ders("aa".toList, BIG)) // 51
size(ders("aaaa".toList, BIG)) // 112
size(ders("aaaaaa".toList, BIG)) // 191
size(ders("aaaaaaaa".toList, BIG)) // 288
size(ders("aaaaaaaaaa".toList, BIG)) // 403
size(ders("aaaaaaaaaaaa".toList, BIG)) // 536
size(ders(("a" * 30).toList, BIG)) // 31010539
@main
def test3() = {
println("Test (a + aa)*")
for (i <- 0 to 30 by 5) {
println(f"$i: ${time_needed(2, matcher(BIG, "a" * i))}%.5f")
}
}
// Some code for pretty printing regexes as trees
def implode(ss: Seq[String]) = ss.mkString("\n")
def explode(s: String) = s.split("\n").toList
def lst(s: String) : String = explode(s) match {
case hd :: tl => implode(" └" ++ hd :: tl.map(" " ++ _))
case Nil => ""
}
def mid(s: String) : String = explode(s) match {
case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
case Nil => ""
}
def indent(ss: Seq[String]) : String = ss match {
case init :+ last => implode(init.map(mid) :+ lst(last))
case _ => ""
}
def pp(e: Rexp) : String = e match {
case ZERO => "0\n"
case ONE => "1\n"
case CHAR(c) => s"$c\n"
case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
case STAR(r) => "STAR\n" ++ pps(r)
}
def pps(es: Rexp*) = indent(es.map(pp))
@main
def test4() = {
println(pp(r2))
println(pp(ders("x".toList, r2)))
println(pp(ders("xy".toList, r2)))
println(pp(ders("xyz".toList, r2)))
}
@main
def all() = { test1(); test2() ; test3() ; test4() }