| author | Christian Urban <urbanc@in.tum.de> | 
| Fri, 12 Oct 2018 10:16:54 +0100 | |
| changeset 576 | 414f1daf5728 | 
| parent 499 | b06c81c0b12f | 
| permissions | -rw-r--r-- | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 1 | import scala.annotation.tailrec | 
| 490 | 2 | import scala.language.implicitConversions | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 3 | |
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | abstract class Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 6 | case object NULL extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | case object EMPTY extends Rexp | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 8 | case object ALLCHAR extends Rexp | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | case class CHAR(c: Char) extends Rexp | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 10 | case class STR(s: String) extends Rexp | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 13 | case class STAR(r: Rexp) extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | case class NOT(r: Rexp) extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | case class REP(r: Rexp, n: Int) extends Rexp | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | // some convenience for typing in regular expressions | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 18 | implicit def string2rexp(s : String) : Rexp = STR(s) | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | |
| 499 | 20 | implicit def RexpOps(r: Rexp) : Rexp = new {
 | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | def | (s: Rexp) = ALT(r, s) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | def % = STAR(r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | def %(n: Int) = REP(r, n) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | def %%(n: Int) = SEQ(REP(r, n), STAR(r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | def ? = ALT(EMPTY, r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | def unary_! = NOT(r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | def ~ (s: Rexp) = SEQ(r, s) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | |
| 499 | 30 | implicit def stringOps(s: String) : Rexp = new {
 | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | def | (r: Rexp) = ALT(s, r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | def | (r: String) = ALT(s, r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | def % = STAR(s) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | def %(n: Int) = REP(s, n) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | def %%(n: Int) = SEQ(REP(s, n), STAR(s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | def ? = ALT(EMPTY, s) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | def unary_! = NOT(s) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | def ~ (r: Rexp) = SEQ(s, r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | def ~ (r: String) = SEQ(s, r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | // nullable function: tests whether the regular | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | // expression can recognise the empty string | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 45 | |
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | def nullable (r: Rexp) : Boolean = r match {
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | case NULL => false | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | case EMPTY => true | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 49 | case ALLCHAR => false | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | case CHAR(_) => false | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 51 | case STR(s) => s.isEmpty | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | case ALT(r1, r2) => nullable(r1) || nullable(r2) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | case STAR(_) => true | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | case NOT(r) => !(nullable(r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | case REP(r, i) => if (i == 0) true else nullable(r) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | // derivative of a regular expression w.r.t. a character | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | case NULL => NULL | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | case EMPTY => NULL | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 63 | case ALLCHAR => EMPTY | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | case CHAR(d) => if (c == d) EMPTY else NULL | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 65 | case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | case SEQ(r1, r2) => | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | else SEQ(der(c, r1), r2) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | case STAR(r) => SEQ(der(c, r), STAR(r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | case NOT(r) => NOT(der (c, r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | case REP(r, i) => | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | |
| 490 | 76 | |
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | // derivative w.r.t. a string (iterates der) | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 78 | @tailrec | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 79 | def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 | case Nil => r | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | case c::s => ders(s, der(c, r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | // main matcher function | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | //examples | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | val int = ("+" | "-").? ~ digit.%%(1)
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 | val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | val ints = List("0", "-4534", "+049", "99")
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 | val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | val errs = List("", "-", "+", "+-1", "-+2", "2-")
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | ints.map(s => matcher(int, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | reals.map(s => matcher(int, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | errs.map(s => matcher(int, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | ints.map(s => matcher(real, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | reals.map(s => matcher(real, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 102 | errs.map(s => matcher(real, s)) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 103 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 104 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 105 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 106 | def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 107 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 108 | def time_needed[T](i: Int, code: => T) = {
 | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 109 | val start = System.nanoTime() | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 110 | for (j <- 1 to i) code | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 111 | val end = System.nanoTime() | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 112 | (end - start)/(i * 1.0e9) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 113 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 114 | |
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
91diff
changeset | 115 | for (i <- 1 to 12000 by 500) {
 | 
| 91 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 116 | println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 117 | } | 
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 118 | |
| 
47f86885d481
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 119 |