diff -r 34feeb53c0ba -r 0315d9983cd0 main_testing3/re.scala --- a/main_testing3/re.scala Sun Jan 15 10:58:13 2023 +0000 +++ b/main_testing3/re.scala Sat Mar 11 22:01:53 2023 +0000 @@ -3,7 +3,6 @@ object M3 { -// Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp @@ -18,7 +17,8 @@ def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2)) -// some convenience for typing in regular expressions + +// some convenience for typing regular expressions import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -43,129 +43,216 @@ def ~ (r: String) = SEQ(s, r) } -// (1) +// examples for the implicits: +// ALT(CHAR('a'), CHAR('b')) +// val areg : Rexp = "a" | "b" + +// SEQ(CHAR('a'), CHAR('b')) +// val sreg : Rexp = "a" ~ "b" + + +// ADD YOUR CODE BELOW +//====================== + +// (1) def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALTs(rs) => rs.exists(nullable) - case SEQs(rs) => rs.forall(nullable) - case STAR(_) => true + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALTs(rs) => (for(reg <- rs) yield nullable(reg)).exists(_ == true) + case SEQs(rs) => (for(reg <- rs) yield nullable(reg)).forall(_ == true) + case STAR(_) => true } +/* +nullable(ZERO) == false +nullable(ONE) == true +nullable(CHAR('a')) == false +nullable(ZERO | ONE) == true +nullable(ZERO | CHAR('a')) == false +nullable(ONE ~ ONE) == true +nullable(ONE ~ CHAR('a')) == false +nullable(STAR(ZERO)) == true +nullable(ALTs(List(ONE, CHAR('a'), ZERO))) == true +nullable(SEQs(List(ONE, ALTs(List(ONE, CHAR('a'), ZERO)), STAR(ZERO)))) == true +*/ + // (2) -def der(c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALTs(rs) => ALTs(rs.map(der(c, _))) - case SEQs(Nil) => ZERO - case SEQs(r1::rs) => - if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs))) - else SEQs(der(c, r1):: rs) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if(c == d) ONE else ZERO + case ALTs(rs) => ALTs(for(reg <- rs) yield der(c, reg)) + case SEQs(Nil) => ZERO + case SEQs(r :: rs) => if(nullable(r)) ALT(SEQs(der(c, r) :: rs), der(c, SEQs(rs))) else SEQs(der(c, r) :: rs) + case STAR(r) => SEQ(der(c,r), STAR(r)) } +/* +der('a', ZERO | ONE) == (ZERO | ZERO) +der('a', (CHAR('a') | ONE) ~ CHAR('a')) == ALT((ONE | ZERO) ~ CHAR('a'), SEQs(List(ONE))) +der('a', (CHAR('a') | CHAR('a')) ~ CHAR('a')) == (ONE | ONE) ~ CHAR('a') +der('a', STAR(CHAR('a'))) == (ONE ~ STAR(CHAR('a'))) +der('b', STAR(CHAR('a'))) == (ZERO ~ STAR(CHAR('a'))) +*/ // (3) def denest(rs: List[Rexp]) : List[Rexp] = rs match { - case Nil => Nil - case ZERO::tl => denest(tl) - case ALTs(rs1)::rs2 => rs1 ::: denest(rs2) - case r::rs => r :: denest(rs) + case Nil => Nil + case ZERO :: rest => denest(rest) + case ALTs(rgs) :: rest => rgs ::: denest(rest) + case r :: rest => r :: denest(rest) } +/* +denest(List(ONE, ZERO, ALTs(List(ONE, CHAR('a'))))) == List(ONE, ONE, CHAR('a')) +denest(List(ONE ~ ONE, ZERO, ZERO | ONE)) == List(ONE ~ ONE, ZERO, ONE) +*/ + // (4) def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { - case Nil => acc - case ZERO::rs => ZERO::Nil - case ONE::rs => flts(rs, acc) - case SEQs(rs1)::rs => flts(rs, acc ::: rs1) - case r::rs => flts(rs, acc :+ r) + case Nil => acc + case ZERO :: rest => List(ZERO) + case ONE :: rest => flts(rest, acc) + case SEQs(rgs) :: rest => flts(rest, acc ::: rgs) + case r :: rest => flts(rest, acc ::: List(r)) } +/* +flts(List(CHAR('a'), ZERO, ONE), Nil) == List(ZERO) +flts(List(CHAR('a'), ONE, ONE, CHAR('b')), Nil) == List(CHAR('a'), CHAR('b')) +flts(List(ONE ~ CHAR('a'), CHAR('b') ~ ONE), Nil) == List(ONE, CHAR('a'), CHAR('b'), ONE) +*/ + // (5) def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { - case Nil => ZERO - case r::Nil => r - case rs => ALTs(rs) + case Nil => ZERO + case List(r) => r + case _ => ALTs(rs) } def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { - case Nil => ONE - case ZERO::Nil => ZERO - case r::Nil => r - case rs => SEQs(rs) + case Nil => ONE + case List(r) => r + case _ => SEQs(rs) +} + +/* +SEQs_smart(Nil) == ONE +SEQs_smart(List(ZERO)) == ZERO +SEQs_smart(List(CHAR('a'))) == CHAR('a') +SEQs_smart(List(ONE ~ ONE)) == ONE ~ ONE +SEQs_smart(List(ONE, ONE)) == SEQs(List(ONE, ONE)) +ALTs_smart(Nil) == ZERO +ALTs_smart(List(ONE ~ ONE)) == ONE ~ ONE +ALTs_smart(List(ZERO, ZERO)) == ALTs(List(ZERO, ZERO)) +*/ + +// (6) +def simp(r: Rexp) : Rexp = r match { + case ALTs(rs) => ALTs_smart(denest(for(reg <- rs) yield simp(reg)).distinct) + case SEQs(rs) => SEQs_smart(flts(for(reg <- rs) yield simp(reg))) + case _ => r } -// (6) +/* +simp(ZERO | ONE) == ONE +simp(STAR(ZERO | ONE)) == STAR(ZERO | ONE) +simp(ONE ~ (ONE ~ (ONE ~ CHAR('a')))) == CHAR('a') +simp(((ONE ~ ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') +simp(((ONE | ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') +simp(ONE ~ (ONE ~ (ONE ~ ZERO))) == ZERO +simp(ALT(ONE ~ (ONE ~ (ONE ~ ZERO)), CHAR('a'))) == CHAR('a') +simp(CHAR('a') | CHAR('a')) == CHAR('a') +simp(CHAR('a') ~ CHAR('a')) == CHAR('a') ~ CHAR('a') +simp(ONE | CHAR('a')) == (ONE | CHAR('a')) +simp(ALT((CHAR('a') | ZERO) ~ ONE,((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) == CHAR('a') +simp((ZERO | ((ZERO | ZERO) | (ZERO | ZERO))) ~ ((ONE | ZERO) | ONE ) ~ (CHAR('a'))) == ZERO +simp(ALT(ONE | ONE, ONE | ONE)) == ONE +simp(ALT(ZERO | CHAR('a'), CHAR('a') | ZERO)) == CHAR('a') +simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) == ALT(ONE, CHAR('a')) +simp(ALTs(Nil)) == ZERO +simp(SEQs(List(CHAR('a')))) == CHAR('a') +*/ -def simp(r: Rexp) : Rexp = r match { - case ALTs(rs) => - ALTs_smart(denest(rs.map(simp)).distinct) - case SEQs(rs) => - SEQs_smart(flts(rs.map(simp))) - case r => r +// (7) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c :: cs => ders(cs, simp(der(c, r))) +} +def matcher(r: Rexp, s: String): Boolean = { + val derivatives = ders(s.toList, r) + nullable(derivatives) } -//println("Simp tests") -//println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))) -//println(simp(((CHAR('a') | ZERO) ~ ONE) | -// (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))) +/* +val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +ders("aaaaa".toList, EVIL) == SEQs(List(STAR(CHAR('a')), STAR(STAR(CHAR('a'))), CHAR('b'))) +ders(List('b'), EVIL) == ONE +ders("bb".toList, EVIL) == ZERO +matcher(EVIL, "a" * 5 ++ "b") == true +matcher(EVIL, "b") == true +matcher(EVIL, "bb") == false +matcher("abc", "abc") == true +matcher(("ab" | "a") ~ (ONE | "bc"), "abc") == true +matcher(ONE, "") == true +matcher(ZERO, "") == false +matcher(ONE | CHAR('a'), "") == true +matcher(ONE | CHAR('a'), "a") == true +*/ + +// (8) +def size(r: Rexp): Int = r match { + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALTs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum + case SEQs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum + case STAR(r) => 1 + size(r) +} + +/* +val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +size(der('a', der('a', EVIL))) == 36 +size(der('a', der('a', der('a', EVIL)))) == 83 +size(ders("aaaaaa".toList, EVIL)) == 7 +size(ders(("a" * 50).toList, EVIL)) == 7 +*/ -// (7) - -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} - -// main matcher function -def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) - -// (8) +// Some testing data +//=================== +/* -def size(r: Rexp): Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALTs(rs) => 1 + rs.map(size).sum - case SEQs(rs) => 1 + rs.map(size).sum - case STAR(r1) => 1 + size(r1) -} +simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a))) +simp(((CHAR('a') | ZERO) ~ ONE) | (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a) - +matcher(("a" ~ "b") ~ "c", "ab") // => false +matcher(("a" ~ "b") ~ "c", "abc") // => true -// some testing data -/* -println(matcher(("a" ~ "b") ~ "c", "abc")) // => true -println(matcher(("a" ~ "b") ~ "c", "ab")) // => false // the supposedly 'evil' regular expression (a*)* b val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -println(matcher(EVIL, "a" * 1000 ++ "b")) // => true -println(matcher(EVIL, "a" * 1000)) // => false +matcher(EVIL, "a" * 1000) // => false +matcher(EVIL, "a" * 1000 ++ "b") // => true + // size without simplifications -println(size(der('a', der('a', EVIL)))) // => 36 -println(size(der('a', der('a', der('a', EVIL))))) // => 83 +size(der('a', der('a', EVIL))) // => 36 +size(der('a', der('a', der('a', EVIL)))) // => 83 // size with simplification -println(simp(der('a', der('a', EVIL)))) -println(simp(der('a', der('a', der('a', EVIL))))) - -println(size(simp(der('a', der('a', EVIL))))) // => 7 -println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7 +size(simp(der('a', der('a', EVIL)))) // => 7 +size(simp(der('a', der('a', der('a', EVIL))))) // => 7 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in -// around 30 seconds. +// 30 seconds. // -// Lets see how long it takes to match strings with -// 5 Million a's...it should be in the range of a -// few seconds. +// Lets see how long it really takes to match strings with +// 5 Million a's...it should be in the range of a few +// of seconds. def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() @@ -179,35 +266,25 @@ } // another "power" test case -println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE) +simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE // the Iterator produces the rexp // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 100 times. -*/ - +// where SEQ is nested 50 times. -assert(simp(ZERO | ONE) == ONE) -assert(simp(STAR(ZERO | ONE)) == STAR(ZERO | ONE)) -assert(simp(ONE ~ (ONE ~ (ONE ~ CHAR('a')))) == CHAR('a')) -assert(simp(((ONE ~ ONE) ~ ONE) ~ CHAR('a')) == CHAR('a')) -assert(simp(((ONE | ONE) ~ ONE) ~ CHAR('a')) == CHAR('a')) -assert(simp(ONE ~ (ONE ~ (ONE ~ ZERO))) == ZERO) -assert(simp(ALT(ONE ~ (ONE ~ (ONE ~ ZERO)), CHAR('a'))) == CHAR('a')) -assert(simp(CHAR('a') | CHAR('a')) == CHAR('a')) -assert(simp(CHAR('a') ~ CHAR('a')) == CHAR('a') ~ CHAR('a')) -assert(simp(ONE | CHAR('a')) == (ONE | CHAR('a'))) -assert(simp(ALT((CHAR('a') | ZERO) ~ ONE, - ((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) == CHAR('a')) -assert(simp((ZERO | ((ZERO | ZERO) | (ZERO | ZERO))) ~ ((ONE | ZERO) | ONE ) ~ (CHAR('a'))) == ZERO) -assert(simp(ALT(ONE | ONE, ONE | ONE)) == ONE) -assert(simp(ALT(ZERO | CHAR('a'), CHAR('a') | ZERO)) == CHAR('a')) -assert(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) == ALT(ONE, CHAR('a'))) -assert(simp(ALTs(Nil)) == ZERO) -assert(simp(SEQs(List(CHAR('a')))) == CHAR('a')) - +*/ } + + + + + +// This template code is subject to copyright +// by King's College London, 2022. Do not +// make the template code public in any shape +// or form, and do not exchange it with other +// students under any circumstance.