| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Tue, 21 Oct 2025 17:09:56 +0200 | |
| changeset 1015 | e8ba0237f005 | 
| parent 742 | b5b5583a3a08 | 
| permissions | -rw-r--r-- | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 1 | abstract class Rexp | 
| 422 | 2 | case object ZERO extends Rexp | 
| 3 | case object ONE extends Rexp | |
| 49 | 4 | case class CHAR(c: Char) extends Rexp | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 5 | case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 6 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 7 | case class STAR(r: Rexp) extends Rexp | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 8 | case class NTIMES(r: Rexp, n: Int) extends Rexp | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 9 | |
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 10 | def simp(r: Rexp): Rexp = r match {
 | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 11 |   case ALT(r1, r2) => {
 | 
| 341 
ac1187b2e5c9
updated program
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
261diff
changeset | 12 |     (simp(r1), simp(r2)) match {
 | 
| 422 | 13 | case (ZERO, r2s) => r2s | 
| 14 | case (r1s, ZERO) => r1s | |
| 341 
ac1187b2e5c9
updated program
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
261diff
changeset | 15 | case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s) | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 16 | } | 
| 49 | 17 | } | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 18 |   case SEQ(r1, r2) => {
 | 
| 341 
ac1187b2e5c9
updated program
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
261diff
changeset | 19 |     (simp(r1), simp(r2)) match {
 | 
| 422 | 20 | case (ZERO, _) => ZERO | 
| 21 | case (_, ZERO) => ZERO | |
| 22 | case (ONE, r2s) => r2s | |
| 23 | case (r1s, ONE) => r1s | |
| 341 
ac1187b2e5c9
updated program
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
261diff
changeset | 24 | case (r1s, r2s) => SEQ(r1s, r2s) | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 25 | } | 
| 49 | 26 | } | 
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 27 | case NTIMES(r, n) => NTIMES(simp(r), n) | 
| 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 28 | case r => r | 
| 49 | 29 | } | 
| 30 | ||
| 31 | def nullable (r: Rexp) : Boolean = r match {
 | |
| 422 | 32 | case ZERO => false | 
| 33 | case ONE => true | |
| 49 | 34 | case CHAR(_) => false | 
| 35 | case ALT(r1, r2) => nullable(r1) || nullable(r2) | |
| 36 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | |
| 37 | case STAR(_) => true | |
| 59 | 38 | case NTIMES(r, i) => if (i == 0) true else nullable(r) | 
| 49 | 39 | } | 
| 40 | ||
| 41 | def der (c: Char, r: Rexp) : Rexp = r match {
 | |
| 422 | 42 | case ZERO => ZERO | 
| 43 | case ONE => ZERO | |
| 44 | case CHAR(d) => if (c == d) ONE else ZERO | |
| 49 | 45 | case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) | 
| 46 | case SEQ(r1, r2) => | |
| 47 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | |
| 48 | else SEQ(der(c, r1), r2) | |
| 49 | case STAR(r) => SEQ(der(c, r), STAR(r)) | |
| 50 | case NTIMES(r, i) => | |
| 422 | 51 | if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) | 
| 49 | 52 | } | 
| 53 | ||
| 54 | def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | |
| 55 | case Nil => r | |
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 56 | case c::s => ders(s, simp(der(c, r))) | 
| 49 | 57 | } | 
| 58 | ||
| 261 
24531cfaa36a
updated handouts
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
258diff
changeset | 59 | def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) | 
| 49 | 60 | |
| 61 | ||
| 62 | //one or zero | |
| 422 | 63 | def OPT(r: Rexp) = ALT(r, ONE) | 
| 49 | 64 | |
| 258 
1e4da6d2490c
updated programs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
121diff
changeset | 65 | def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 | 
| 49 | 66 | |
| 67 | def time_needed[T](i: Int, code: => T) = {
 | |
| 68 | val start = System.nanoTime() | |
| 69 | for (j <- 1 to i) code | |
| 70 | val end = System.nanoTime() | |
| 71 | (end - start)/(i * 1.0e9) | |
| 72 | } | |
| 73 | ||
| 422 | 74 | /* | 
| 75 | for (i <- 1 to 9001 by 1000) {
 | |
| 261 
24531cfaa36a
updated handouts
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
258diff
changeset | 76 | println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i)))) | 
| 49 | 77 | } | 
| 422 | 78 | */ | 
| 49 | 79 | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 80 | // some convenience for typing in regular expressions | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 81 | import scala.language.implicitConversions | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 82 | import scala.language.reflectiveCalls | 
| 422 | 83 | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 84 | def charlist2rexp(s : List[Char]) : Rexp = s match {
 | 
| 422 | 85 | case Nil => ONE | 
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 86 | case c::Nil => CHAR(c) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 87 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 88 | } | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 89 | implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 90 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 91 | implicit def RexpOps (r: Rexp) = new {
 | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 92 | def | (s: Rexp) = ALT(r, s) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 93 | def % = STAR(r) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 94 | def ~ (s: Rexp) = SEQ(r, s) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 95 | } | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 96 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 97 | implicit def stringOps (s: String) = new {
 | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 98 | def | (r: Rexp) = ALT(s, r) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 99 | def | (r: String) = ALT(s, r) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 100 | def % = STAR(s) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 101 | def ~ (r: Rexp) = SEQ(s, r) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 102 | def ~ (r: String) = SEQ(s, r) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 103 | } | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 104 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 105 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 106 | def PLUS(r: Rexp) = SEQ(r, STAR(r)) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 107 | def RANGE(s: List[Char]) : Rexp = s match {
 | 
| 422 | 108 | case Nil => ZERO | 
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 109 | case c::s => ALT(CHAR(c), RANGE(s)) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 110 | } | 
| 422 | 111 | def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp = | 
| 112 | if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1)) | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 113 | |
| 422 | 114 | |
| 115 | println("Coursework:")
 | |
| 116 | val REG1 = PLUS(PLUS("a" ~ "a" ~ "a"))
 | |
| 117 | val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a")))  // 19 as ~ a?
 | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 118 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 119 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 120 | //40 * aaa matches | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 121 | //43 * aaa + aa does not | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 122 | //45 * aaa + a | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 123 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 124 | println("EVIL1:")
 | 
| 422 | 125 | println(matches(REG1, "aaa" * 40)) | 
| 126 | println(matches(REG1, "aaa" * 43 + "aa")) | |
| 127 | println(matches(REG1, "aaa" * 45 + "a")) | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 128 | println("EVIL2:")
 | 
| 422 | 129 | println(matches(REG2, "aaa" * 40)) | 
| 130 | println(matches(REG2, "aaa" * 43 + "aa")) | |
| 131 | println(matches(REG2, "aaa" * 45 + "a")) | |
| 363 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 132 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 133 | println("EMAIL:")
 | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 134 | val LOWERCASE = "abcdefghijklmnopqrstuvwxyz" | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 135 | val DIGITS = "0123456789" | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 136 | val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~ | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 137 | PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~ | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 138 | NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6) | 
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 139 | |
| 
0d6deecdb2eb
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
341diff
changeset | 140 | val my_email = "christian.urban@kcl.ac.uk" | 
| 422 | 141 | |
| 142 | println(matches(EMAIL, my_email)) | |
| 143 | ||
| 144 | println(matches(NTIMES("a", 2), "a"))
 | |
| 145 | println(matches(NTIMES("a", 2), "aa"))
 | |
| 146 | println(matches(NTIMES("a", 2), "aaa"))
 | |
| 147 | ||
| 148 | println(matches(NMTIMES("a", 2, 3), "a"))
 | |
| 149 | println(matches(NMTIMES("a", 2, 3), "aa"))
 | |
| 150 | println(matches(NMTIMES("a", 2, 3), "aaa"))
 | |
| 151 | println(matches(NMTIMES("a", 2, 3), "aaaa"))
 |