diff -r de59aa20a1dc -r ffce7b61b446 main_testing3/re.scala --- a/main_testing3/re.scala Mon Nov 08 01:16:13 2021 +0000 +++ b/main_testing3/re.scala Mon Nov 08 01:39:00 2021 +0000 @@ -1,19 +1,24 @@ // Core Part about Regular Expression Matching //============================================= -object CW8c { +object M3 { // Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star // some convenience for typing in regular expressions + +//the usual binary choice can be defined in terms of ALTs +def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) + + import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -49,7 +54,7 @@ case ZERO => false case ONE => true case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) + case ALTs(rs) => rs.exists(nullable) case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true } @@ -63,13 +68,22 @@ case ZERO => ZERO case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case ALTs(rs) => ALTs(rs.map(der(c, _))) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } + + +def flts(rs: List[Rexp]) : List[Rexp] = rs match { + case Nil => Nil + case ZERO::tl => flts(tl) + case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) + case r::rs => r :: flts(rs) +} + // (3) Complete the simp function according to // the specification given in the coursework; this // function simplifies a regular expression from @@ -77,11 +91,12 @@ // expressions; however it does not simplify inside // STAR-regular expressions. + def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + case ALTs(rs) => (flts(rs.map(simp)).distinct) match { + case Nil => ZERO + case r::Nil => r + case rs => ALTs(rs) } case SEQ(r1, r2) => (simp(r1), simp(r2)) match { case (ZERO, _) => ZERO @@ -117,7 +132,7 @@ case ZERO => 1 case ONE => 1 case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size (r2) + case ALTs(rs) => 1 + rs.map(size).sum case SEQ(r1, r2) => 1 + size(r1) + size (r2) case STAR(r1) => 1 + size(r1) } @@ -132,16 +147,19 @@ // the supposedly 'evil' regular expression (a*)* b val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -//matcher(EVIL, "a" * 1000 ++ "b") // => true -//matcher(EVIL, "a" * 1000) // => false +//println(matcher(EVIL, "a" * 1000 ++ "b")) // => true +//println(matcher(EVIL, "a" * 1000)) // => false // size without simplifications -//size(der('a', der('a', EVIL))) // => 28 -//size(der('a', der('a', der('a', EVIL)))) // => 58 +//println(size(der('a', der('a', EVIL)))) // => 28 +//println(size(der('a', der('a', der('a', EVIL))))) // => 58 // size with simplification -//size(simp(der('a', der('a', EVIL)))) // => 8 -//size(simp(der('a', der('a', der('a', EVIL))))) // => 8 +//println(simp(der('a', der('a', EVIL)))) +//println(simp(der('a', der('a', der('a', EVIL))))) + +//println(size(simp(der('a', der('a', EVIL))))) // => 8 +//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in @@ -155,11 +173,11 @@ val start = System.nanoTime() for (j <- 1 to i) code val end = System.nanoTime() - (end - start)/(i * 1.0e9) + "%.5f".format((end - start)/(i * 1.0e9)) } //for (i <- 0 to 5000000 by 500000) { -// println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") +// println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") //} // another "power" test case @@ -169,7 +187,7 @@ // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 100 times. +// where SEQ is nested 50 times.