| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Tue, 06 Oct 2020 13:32:00 +0100 | |
| changeset 776 | f3c3d0ee0f6b | 
| parent 742 | 155426396b5f | 
| permissions | -rw-r--r-- | 
| 623 | 1 | // A version which attempts to move whole strings, not | 
| 477 | 2 | // just characters, under derivatives whenever possible | 
| 3 | ||
| 422 | 4 | abstract class Rexp | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 5 | case object ZERO extends Rexp | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 6 | case object ONE extends Rexp | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | case class CHAR(c: Char) extends Rexp | 
| 422 | 8 | case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
| 9 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | |
| 10 | case class STAR(r: Rexp) extends Rexp | |
| 11 | case class NTIMES(r: Rexp, n: Int) extends Rexp | |
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | |
| 623 | 13 | // the nullable function: tests whether the regular | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | // expression can recognise the empty string | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | def nullable (r: Rexp) : Boolean = r match {
 | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 16 | case ZERO => false | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 17 | case ONE => true | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 18 | case CHAR(_) => false | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | case ALT(r1, r2) => nullable(r1) || nullable(r2) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | case STAR(_) => true | 
| 121 
43c116860e47
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
93diff
changeset | 22 | case NTIMES(r, i) => if (i == 0) true else nullable(r) | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | |
| 623 | 25 | // the derivative of a regular expression w.r.t. a character | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 27 | case ZERO => ZERO | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 28 | case ONE => ZERO | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 29 | case CHAR(d) => if (c == d) ONE else ZERO | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 30 | case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | case SEQ(r1, r2) => | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | else SEQ(der(c, r1), r2) | 
| 441 | 34 | case STAR(r1) => SEQ(der(c, r1), STAR(r1)) | 
| 455 
192f4c59633e
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
441diff
changeset | 35 | case NTIMES(r1, i) => | 
| 631 | 36 | if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1)) | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | |
| 422 | 39 | def simp(r: Rexp) : Rexp = r match {
 | 
| 40 |   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
 | |
| 41 | case (ZERO, r2s) => r2s | |
| 42 | case (r1s, ZERO) => r1s | |
| 43 | case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) | |
| 44 | } | |
| 45 |   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
 | |
| 46 | case (ZERO, _) => ZERO | |
| 47 | case (_, ZERO) => ZERO | |
| 48 | case (ONE, r2s) => r2s | |
| 49 | case (r1s, ONE) => r1s | |
| 50 | case (r1s, r2s) => SEQ(r1s, r2s) | |
| 51 | } | |
| 52 | case r => r | |
| 53 | } | |
| 54 | ||
| 623 | 55 | // an example | 
| 546 | 56 | val r = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
 | 
| 57 | der('x', r)
 | |
| 58 | der('y', der('x', r))
 | |
| 59 | der('z', der('y', der('x', r)))
 | |
| 60 | simp(der('z', der('y', der('x', r))))
 | |
| 61 | ||
| 477 | 62 | // *new* | 
| 623 | 63 | // the derivative w.r.t. a string (iterates der) | 
| 414 | 64 | def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
 | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 65 | case (Nil, r) => r | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 66 | case (s, ZERO) => ZERO | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 67 | case (s, ONE) => if (s == Nil) ONE else ZERO | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 68 | case (s, CHAR(c)) => if (s == List(c)) ONE else | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 69 | if (s == Nil) CHAR(c) else ZERO | 
| 467 
3fc9b036321d
fixed bug
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
455diff
changeset | 70 | case (s, ALT(r1, r2)) => ALT(ders2(s, r1), ders2(s, r2)) | 
| 422 | 71 | case (c::s, r) => ders2(s, simp(der(c, r))) | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 72 | } | 
| 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 73 | |
| 623 | 74 | // the main matcher function | 
| 631 | 75 | def matcher(r: Rexp, s: String) : Boolean = | 
| 76 | nullable(ders2(s.toList, r)) | |
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 78 | |
| 623 | 79 | // one or zero | 
| 407 
4b454a6d1814
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 80 | def OPT(r: Rexp) = ALT(r, ONE) | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | |
| 477 | 82 | |
| 83 | // Test Cases | |
| 84 | ||
| 422 | 85 | def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 | 
| 441 | 86 | val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 | 
| 422 | 87 | |
| 623 | 88 | // for measuring time | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | def time_needed[T](i: Int, code: => T) = {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 | val start = System.nanoTime() | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | for (j <- 1 to i) code | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | val end = System.nanoTime() | 
| 631 | 93 | (end - start) / (i * 1.0e9) | 
| 441 | 94 | } | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | |
| 623 | 96 | |
| 97 | // test: (a?{n}) (a{n})
 | |
| 638 | 98 | for (i <- 0 to 11000 by 1000) {
 | 
| 631 | 99 |   println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
 | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | |
| 623 | 102 | |
| 103 | // test: (a*)* b | |
| 631 | 104 | for (i <- 0 to 7000000 by 500000) {
 | 
| 105 |   println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
 | |
| 441 | 106 | } | 
| 107 | ||
| 108 | ||
| 109 | ||
| 623 | 110 | // the size of a regular expressions - for testing purposes | 
| 550 | 111 | def size(r: Rexp) : Int = r match {
 | 
| 112 | case ZERO => 1 | |
| 113 | case ONE => 1 | |
| 114 | case CHAR(_) => 1 | |
| 115 | case ALT(r1, r2) => 1 + size(r1) + size(r2) | |
| 116 | case SEQ(r1, r2) => 1 + size(r1) + size(r2) | |
| 117 | case STAR(r) => 1 + size(r) | |
| 118 | case NTIMES(r, _) => 1 + size(r) | |
| 119 | } | |
| 120 | ||
| 121 | ||
| 122 | // string of a regular expressions - for testing purposes | |
| 123 | def string(r: Rexp) : String = r match {
 | |
| 124 | case ZERO => "0" | |
| 125 | case ONE => "1" | |
| 126 | case CHAR(c) => c.toString | |
| 127 |   case ALT(r1, r2) => s"(${string(r1)} + ${string(r2)})"
 | |
| 128 |   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
 | |
| 129 |   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
 | |
| 130 |   case STAR(r) => s"(${string(r)})*"
 | |
| 131 |   case NTIMES(r, n) =>  s"(${string(r)}){${n}}"
 | |
| 132 | } | |
| 133 | ||
| 134 | ||
| 135 | // test: ("a" | "aa")*
 | |
| 136 | val EVIL3 = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
 | |
| 137 | ||
| 623 | 138 | // test: ("" | "a" | "aa")*
 | 
| 631 | 139 | val EVIL4 = STAR(ALT(ONE, ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))))
 | 
| 550 | 140 | |
| 141 | val t1  = ders2("a".toList, EVIL3)
 | |
| 142 | val t1s = simp(t1) | |
| 143 | val t2  = ders2("aa".toList, t1s)
 | |
| 144 | val t2s = simp(t2) | |
| 145 | val t3  = ders2("aaa".toList, t2s)
 | |
| 146 | val t3s = simp(t3) | |
| 147 | val t4  = ders2("aaaa".toList, t3s)
 | |
| 148 | val t4s = simp(t4) | |
| 149 | println(string(t1) + " " + size(t1)) | |
| 150 | println("s " + string(t1s) + "    " + size(t1s))
 | |
| 151 | println(string(t2) + " " + size(t2)) | |
| 152 | println("s " + string(t2s) + "    " + size(t2s))
 | |
| 153 | println(string(t3) + " " + size(t3)) | |
| 154 | println("s " + string(t3s) + "    " + size(t3s))
 | |
| 155 | println(string(t4) + " " + size(t4)) | |
| 156 | println("s " + string(t4s) + "    " + size(t4s))
 |