diff -r e9d14d58be3c -r daf561a83ba6 main_marking3/re.scala --- a/main_marking3/re.scala Thu Jan 13 12:55:03 2022 +0000 +++ b/main_marking3/re.scala Mon Apr 11 23:55:27 2022 +0100 @@ -1,23 +1,25 @@ -// Core Part about Regular Expression Matching +// Main Part 3 about Regular Expression Matching //============================================= -object CW8c { +object M3 { // Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star + + +//the usual binary choice can be defined in terms of ALTs +def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) // some convenience for typing in regular expressions - import scala.language.implicitConversions import scala.language.reflectiveCalls - def charlist2rexp(s: List[Char]): Rexp = s match { case Nil => ONE case c::Nil => CHAR(c) @@ -49,7 +51,7 @@ case ZERO => false case ONE => true case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) + case ALTs(rs) => rs.exists(nullable) case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true } @@ -63,25 +65,39 @@ case ZERO => ZERO case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case ALTs(rs) => ALTs(rs.map(der(c, _))) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } -// (3) Complete the simp function according to + +// (3) Implement the flatten function flts. It +// deletes 0s from a list of regular expressions +// and also 'spills out', or flattens, nested +// ALTernativeS. + +def flts(rs: List[Rexp]) : List[Rexp] = rs match { + case Nil => Nil + case ZERO::tl => flts(tl) + case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) + case r::rs => r :: flts(rs) +} + +// (4) Complete the simp function according to // the specification given in the coursework; this // function simplifies a regular expression from // the inside out, like you would simplify arithmetic // expressions; however it does not simplify inside // STAR-regular expressions. + def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + case ALTs(rs) => (flts(rs.map(simp)).distinct) match { + case Nil => ZERO + case r::Nil => r + case rs => ALTs(rs) } case SEQ(r1, r2) => (simp(r1), simp(r2)) match { case (ZERO, _) => ZERO @@ -93,8 +109,9 @@ case r => r } +simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) -// (4) Complete the two functions below; the first +// (5) Complete the two functions below; the first // calculates the derivative w.r.t. a string; the second // is the regular expression matcher taking a regular // expression and a string and checks whether the @@ -108,7 +125,7 @@ // main matcher function def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) -// (5) Complete the size function for regular +// (6) Complete the size function for regular // expressions according to the specification // given in the coursework. @@ -117,7 +134,7 @@ case ZERO => 1 case ONE => 1 case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size (r2) + case ALTs(rs) => 1 + rs.map(size).sum case SEQ(r1, r2) => 1 + size(r1) + size (r2) case STAR(r1) => 1 + size(r1) } @@ -130,18 +147,21 @@ //matcher(("a" ~ "b") ~ "c", "ab") // => false // the supposedly 'evil' regular expression (a*)* b -val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) +// val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -//matcher(EVIL, "a" * 1000 ++ "b") // => true -//matcher(EVIL, "a" * 1000) // => false +//println(matcher(EVIL, "a" * 1000 ++ "b")) // => true +//println(matcher(EVIL, "a" * 1000)) // => false // size without simplifications -//size(der('a', der('a', EVIL))) // => 28 -//size(der('a', der('a', der('a', EVIL)))) // => 58 +//println(size(der('a', der('a', EVIL)))) // => 28 +//println(size(der('a', der('a', der('a', EVIL))))) // => 58 // size with simplification -//size(simp(der('a', der('a', EVIL)))) // => 8 -//size(simp(der('a', der('a', der('a', EVIL))))) // => 8 +//println(simp(der('a', der('a', EVIL)))) +//println(simp(der('a', der('a', der('a', EVIL))))) + +//println(size(simp(der('a', der('a', EVIL))))) // => 8 +//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in @@ -155,11 +175,11 @@ val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() - (end - start)/(i * 1.0e9) + "%.5f".format((end - start)/(i * 1.0e9)) } //for (i <- 0 to 5000000 by 500000) { -// println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") +// println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") //} // another "power" test case @@ -169,7 +189,7 @@ // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 100 times. +// where SEQ is nested 50 times.