diff -r 33c2655be47d -r 5549016ab10f testing4/re.scala --- a/testing4/re.scala Sat Dec 01 15:09:37 2018 +0000 +++ b/testing4/re.scala Thu Dec 06 13:15:28 2018 +0000 @@ -1,20 +1,22 @@ // Part 1 about Regular Expression Matching //========================================== +//object CW9a { + // Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star - +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp -// some convenience for typing regular expressions +// some convenience for typing in regular expressions -import scala.language.implicitConversions -import scala.language.reflectiveCalls +import scala.language.implicitConversions +import scala.language.reflectiveCalls + def charlist2rexp(s: List[Char]): Rexp = s match { case Nil => ONE @@ -38,190 +40,116 @@ } // (1) Complete the function nullable according to -// the definition given in the coursework; this +// the definition given in the coursework; this // function checks whether a regular expression // can match the empty string and Returns a boolean // accordingly. -def nullable (r: Rexp) : Boolean = r match{ +def nullable (r: Rexp) : Boolean = r match { case ZERO => false case ONE => true case CHAR(_) => false - case ALT(a,b)=>nullable(a)||nullable(b) - case SEQ(a,b) => nullable(a) && nullable(b) + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true } - -/*val rex = "1~0.%|11" - -assert(der('1',rex) == SEQ(ONE,SEQ(CHAR(~),SEQ(CHAR(0),SEQ(CHAR(.),SEQ(CHAR(%),SEQ(CHAR(|),SEQ(CHAR(1),CHAR(1))))))))) - -assert(der('1',der('1',rex)) == - ALT(SEQ(ZERO,SEQ(CHAR(~),SEQ(CHAR(0),SEQ(CHAR(.),SEQ(CHAR(%),SEQ(CHAR(|), - SEQ(CHAR(1),CHAR(1)))))))),SEQ(ZERO,SEQ(CHAR(0),SEQ(CHAR(.),SEQ(CHAR(%), - SEQ(CHAR(|),SEQ(CHAR(1),CHAR(1))))))))*/ - // (2) Complete the function der according to // the definition given in the coursework; this -// function calculates the derivative of a +// function calculates the derivative of a // regular expression w.r.t. a character. -def der (c: Char, r: Rexp) : Rexp = r match{ +def der (c: Char, r: Rexp) : Rexp = r match { case ZERO => ZERO case ONE => ZERO - case CHAR(d) => if (c==d) ONE else ZERO - case ALT(a,b) => der(c,a)|der(c,b) - case SEQ(a,b) => if(nullable(a)) {(der(c,a)~b)|der(c,b)} - else der(c,a)~b - case STAR(a) => der(c,a)~STAR(a) + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } -println(der('a', ZERO | ONE))// == (ZERO | ZERO) -println(der('a', (CHAR('a') | ONE) ~ CHAR('a')))// ==ALT((ONE | ZERO) ~ CHAR('a'), ONE) -println(der('a', STAR(CHAR('a'))))// == (ONE ~ STAR(CHAR('a'))) -println(der('b', STAR(CHAR('a'))))// == (ZERO ~ STAR(CHAR('a')))) - - -//ALT(SEQ(ZERO,ZERO),ZERO) -//ALT(ALT(ZERO,ZERO),ALT(ZERO,ZERO)) - -// * == | -// + == ~ // (3) Complete the simp function according to // the specification given in the coursework; this // function simplifies a regular expression from -// the inside out, like you would simplify arithmetic -// expressions; however it does not simplify inside +// the inside out, like you would simplify arithmetic +// expressions; however it does not simplify inside // STAR-regular expressions. -/* -def simp(r: Rexp) : Rexp = r match{ - case SEQ(ZERO,_) => ZERO - case SEQ(_,ZERO) => ZERO - case SEQ(ONE,a) => simp(a) - case SEQ(a,ONE) => simp(a) - case ALT(ZERO,a) => simp(a) - case ALT(a,ZERO) => simp(a) - case ALT(a,b) => if(a == b) simp(a) else r - case _ => r -}*/ - -def simp(r: Rexp) : Rexp = r match{ - case SEQ(a,b) =>{ val sa = simp(a) - val sb = simp(b) - if(sa == ZERO || sb == ZERO) ZERO - else if(sa == ONE) sb - else if(sb == ONE) sa - else SEQ(sa,sb) - } - case ALT(a,b) =>{ val sa = simp(a) - val sb = simp(b) - if(sa == ONE || sb == ONE) ONE - else if(sa == ZERO) sb - else if(sb == ZERO) sa - else if(sa == sb) sa - else ALT(sa,sb) - } - //case STAR(STAR(a)) => simp(STAR(a)) - //case STAR(a) => STAR(simp(a)) - case _ => r - /* - case SEQ(ZERO,_) => ZERO - case SEQ(_,ZERO) => ZERO - case SEQ(ONE,a) => simp(a) - case SEQ(a,ONE) => simp(a) - case SEQ(a,b) => SEQ(simp(a),simp(b)) - //case ALT(ZERO,a) => simp(a) - case ALT(a,ZERO) => simp(a) - case ALT(ONE,_) => ONE - case ALT(_,ONE) => ONE - case ALT(a,b) => {val sa = simp(a) - if(sa == simp(b)) sa else r - } - case STAR(STAR(a)) => simp(STAR(a)) - case STAR(a) => STAR(simp(a)) - case _ => r*/ +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r } -/*val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -println("TEST: " + simp(der('a', der('a', EVIL)))) -println(simp(ONE)) -val r1 = ALT(ZERO,ONE) -val r2 = SEQ(ONE,ZERO) -val r3 = SEQ(r1,SEQ(r2,r1)) -println("R1 = " + simp(r1)) -println(simp(r2)) -println(simp(r3)) -*/ - -// (4) Complete the two functions below; the first +// (4) Complete the two functions below; the first // calculates the derivative w.r.t. a string; the second // is the regular expression matcher taking a regular // expression and a string and checks whether the -// string matches the regular expression +// string matches the regular expression. -def ders (s: List[Char], r: Rexp ="") : Rexp = s match{ +def ders (s: List[Char], r: Rexp) : Rexp = s match { case Nil => r - case a::z => ders(z,simp(der(a,r))) + case c::s => ders(s, simp(der(c, r))) } -def matcher(r: Rexp, s: String): Boolean = { - val derivatives = simp(ders(s.toList,r)) - nullable(derivatives) -} +// main matcher function +def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) // (5) Complete the size function for regular -// expressions according to the specification +// expressions according to the specification // given in the coursework. -def size(r: Rexp): Int = r match{ + +def size(r: Rexp): Int = r match { case ZERO => 1 case ONE => 1 case CHAR(_) => 1 - case SEQ(a,b) => 1 + size(a) + size(b) - case ALT(a,b) => 1 + size(a) + size(b) - case STAR(a) => 1 + size(a) + case ALT(r1, r2) => 1 + size(r1) + size (r2) + case SEQ(r1, r2) => 1 + size(r1) + size (r2) + case STAR(r1) => 1 + size(r1) } -println(der('a', ZERO | ONE))// == (ZERO | ZERO) -println(der('a', (CHAR('a') | ONE) ~ CHAR('a')))// ==ALT((ONE | ZERO) ~ CHAR('a'), ONE) -println(der('a', STAR(CHAR('a'))))// == (ONE ~ STAR(CHAR('a'))) -println(der('b', STAR(CHAR('a'))))// == (ZERO ~ STAR(CHAR('a')))) -// some testing data -/* - -assert(matcher(("a" ~ "b") ~ "c", "abc") == true) // => true -assert(matcher(("a" ~ "b") ~ "c", "ab") == false) // => false - - -// the supposedly 'evil' regular expression (a*)* b -//val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -assert(matcher(EVIL, "a" * 1000 ++ "b") == true) // => true -assert(matcher(EVIL, "a" * 1000) == false) // => false +// some testing data +/* +matcher(("a" ~ "b") ~ "c", "abc") // => true +matcher(("a" ~ "b") ~ "c", "ab") // => false + +// the supposedly 'evil' regular expression (a*)* b +val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) + +matcher(EVIL, "a" * 1000 ++ "b") // => true +matcher(EVIL, "a" * 1000) // => false // size without simplifications -assert("28 " + size(der('a', der('a', EVIL))) ==28)// => 28 -assert("58 " + size(der('a', der('a', der('a', EVIL)))) ==58)// => 58 +size(der('a', der('a', EVIL))) // => 28 +size(der('a', der('a', der('a', EVIL)))) // => 58 // size with simplification -assert("8 " + size(simp(der('a', der('a', EVIL)))) ==8)// => 8 -assert("8 " + size(simp(der('a', der('a', der('a', EVIL))))) ==8) // => 8 - -*/ - +size(simp(der('a', der('a', EVIL)))) // => 8 +size(simp(der('a', der('a', der('a', EVIL))))) // => 8 -/* -// Python needs around 30 seconds for matching 28 a's with EVIL. +// Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in -// 30 seconds. +// around 30 seconds. // -// Lets see how long it really takes to match strings with -// 5 Million a's...it should be in the range of a couple -// of seconds. +// Lets see how long it takes to match strings with +// 5 Million a's...it should be in the range of a +// couple of seconds. def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() @@ -234,13 +162,15 @@ println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i)))) } -// another "power" test case -simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE +// another "power" test case +simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE // the Iterator produces the rexp // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 50 times. +// where SEQ is nested 100 times. + +*/ -*/ +//}