| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sun, 15 Jan 2023 10:58:13 +0000 | |
| changeset 459 | 7acbef680bef | 
| parent 425 | 6e990ae2c6a3 | 
| child 474 | 8a61bcd51ec3 | 
| permissions | -rw-r--r-- | 
| 396 | 1 | // Main Part 3 about Regular Expression Matching | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 2 | //============================================== | 
| 288 | 3 | |
| 396 | 4 | object M3 {
 | 
| 219 | 5 | |
| 6 | abstract class Rexp | |
| 7 | case object ZERO extends Rexp | |
| 8 | case object ONE extends Rexp | |
| 9 | case class CHAR(c: Char) extends Rexp | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 10 | case class ALTs(rs: List[Rexp]) extends Rexp // alternatives | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 11 | case class SEQs(rs: List[Rexp]) extends Rexp // sequences | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 12 | case class STAR(r: Rexp) extends Rexp // star | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 13 | |
| 219 | 14 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 15 | //the usual binary choice and binary sequence can be defined | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 16 | //in terms of ALTs and SEQs | 
| 396 | 17 | def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 18 | def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2)) | 
| 396 | 19 | |
| 20 | ||
| 421 | 21 | // some convenience for typing regular expressions | 
| 219 | 22 | import scala.language.implicitConversions | 
| 23 | import scala.language.reflectiveCalls | |
| 24 | ||
| 25 | def charlist2rexp(s: List[Char]): Rexp = s match {
 | |
| 26 | case Nil => ONE | |
| 27 | case c::Nil => CHAR(c) | |
| 28 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | |
| 29 | } | |
| 30 | implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) | |
| 31 | ||
| 32 | implicit def RexpOps (r: Rexp) = new {
 | |
| 33 | def | (s: Rexp) = ALT(r, s) | |
| 34 | def % = STAR(r) | |
| 35 | def ~ (s: Rexp) = SEQ(r, s) | |
| 36 | } | |
| 37 | ||
| 38 | implicit def stringOps (s: String) = new {
 | |
| 39 | def | (r: Rexp) = ALT(s, r) | |
| 40 | def | (r: String) = ALT(s, r) | |
| 41 | def % = STAR(s) | |
| 42 | def ~ (r: Rexp) = SEQ(s, r) | |
| 43 | def ~ (r: String) = SEQ(s, r) | |
| 44 | } | |
| 45 | ||
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 46 | // examples for the implicits: | 
| 421 | 47 | // ALT(CHAR('a'), CHAR('b'))
 | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 48 | // val areg : Rexp = "a" | "b" | 
| 421 | 49 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 50 | // SEQ(CHAR('a'), CHAR('b')) 
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 51 | // val sreg : Rexp = "a" ~ "b" | 
| 219 | 52 | |
| 53 | ||
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 54 | // ADD YOUR CODE BELOW | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 55 | //====================== | 
| 396 | 56 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 57 | // (1) | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 58 | def nullable (r: Rexp) : Boolean = ??? | 
| 396 | 59 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 60 | // (2) | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 61 | def der (c: Char, r: Rexp) : Rexp = ??? | 
| 396 | 62 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 63 | // (3) | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 64 | def denest(rs: List[Rexp]) : List[Rexp] = ??? | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 65 | |
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 66 | // (4) | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 67 | def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ??? | 
| 219 | 68 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 69 | // (5) | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 70 | def ALTs_smart(rs: List[Rexp]) : Rexp = ??? | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 71 | def SEQs_smart(rs: List[Rexp]) : Rexp = ??? | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 72 | |
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 73 | // (6) | 
| 347 | 74 | def simp(r: Rexp) : Rexp = ??? | 
| 219 | 75 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 76 | // (7) | 
| 347 | 77 | def ders (s: List[Char], r: Rexp) : Rexp = ??? | 
| 78 | def matcher(r: Rexp, s: String): Boolean = ??? | |
| 219 | 79 | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 80 | // (8) | 
| 347 | 81 | def size(r: Rexp): Int = ??? | 
| 219 | 82 | |
| 83 | ||
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 84 | // Some testing data | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 85 | //=================== | 
| 219 | 86 | /* | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 87 | |
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 88 | simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))   // => ALTs(List(ONE, CHAR(a)))
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 89 | simp(((CHAR('a') | ZERO) ~ ONE) | 
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 90 |      (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))   // => CHAR(a)
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 91 | |
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 92 | matcher(("a" ~ "b") ~ "c", "ab")   // => false
 | 
| 219 | 93 | matcher(("a" ~ "b") ~ "c", "abc")  // => true
 | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 94 | |
| 219 | 95 | |
| 96 | // the supposedly 'evil' regular expression (a*)* b | |
| 97 | val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 | |
| 98 | ||
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 99 | matcher(EVIL, "a" * 1000) // => false | 
| 219 | 100 | matcher(EVIL, "a" * 1000 ++ "b") // => true | 
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 101 | |
| 219 | 102 | |
| 103 | // size without simplifications | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 104 | size(der('a', der('a', EVIL)))             // => 36
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 105 | size(der('a', der('a', der('a', EVIL))))   // => 83
 | 
| 219 | 106 | |
| 107 | // size with simplification | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 108 | size(simp(der('a', der('a', EVIL))))           // => 7
 | 
| 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 109 | size(simp(der('a', der('a', der('a', EVIL))))) // => 7
 | 
| 219 | 110 | |
| 220 | 111 | // Python needs around 30 seconds for matching 28 a's with EVIL. | 
| 112 | // Java 9 and later increase this to an "astonishing" 40000 a's in | |
| 113 | // 30 seconds. | |
| 219 | 114 | // | 
| 115 | // Lets see how long it really takes to match strings with | |
| 425 
6e990ae2c6a3
updated solutions and templates
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
421diff
changeset | 116 | // 5 Million a's...it should be in the range of a few | 
| 220 | 117 | // of seconds. | 
| 219 | 118 | |
| 119 | def time_needed[T](i: Int, code: => T) = {
 | |
| 120 | val start = System.nanoTime() | |
| 121 | for (j <- 1 to i) code | |
| 122 | val end = System.nanoTime() | |
| 400 | 123 | "%.5f".format((end - start)/(i * 1.0e9)) | 
| 219 | 124 | } | 
| 125 | ||
| 126 | for (i <- 0 to 5000000 by 500000) {
 | |
| 400 | 127 |   println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") 
 | 
| 219 | 128 | } | 
| 129 | ||
| 220 | 130 | // another "power" test case | 
| 400 | 131 | simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE | 
| 220 | 132 | |
| 133 | // the Iterator produces the rexp | |
| 134 | // | |
| 135 | // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) | |
| 136 | // | |
| 137 | // where SEQ is nested 50 times. | |
| 138 | ||
| 219 | 139 | */ | 
| 140 | ||
| 288 | 141 | } |