diff -r b528d1d3d3c3 -r 59e005dcf163 main_testing3/re.scala --- a/main_testing3/re.scala Thu Nov 02 13:53:37 2023 +0000 +++ b/main_testing3/re.scala Thu Nov 02 23:34:53 2023 +0000 @@ -19,240 +19,159 @@ // some convenience for typing regular expressions -import scala.language.implicitConversions -import scala.language.reflectiveCalls def charlist2rexp(s: List[Char]): Rexp = s match { case Nil => ONE case c::Nil => CHAR(c) case c::s => SEQ(CHAR(c), charlist2rexp(s)) } -implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList) + +import scala.language.implicitConversions -implicit def RexpOps (r: Rexp) = new { +given Conversion[String, Rexp] = (s => charlist2rexp(s.toList)) + +extension (r: Rexp) { def | (s: Rexp) = ALT(r, s) def % = STAR(r) def ~ (s: Rexp) = SEQ(r, s) } -implicit def stringOps (s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} +// some examples for the conversion and extension: -// examples for the implicits: -// ALT(CHAR('a'), CHAR('b')) // val areg : Rexp = "a" | "b" - -// SEQ(CHAR('a'), CHAR('b')) +// => ALTs(List(CHAR('a'), CHAR('b'))) +// // val sreg : Rexp = "a" ~ "b" - +// => SEQs(List(CHAR('a'), CHAR('b'))) +// +// val star_reg : Rexp = ("a" ~ "b").% +// => STAR(SEQs(List(CHAR('a'), CHAR('b')))) // ADD YOUR CODE BELOW //====================== -// (1) +// (1) def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALTs(rs) => (for(reg <- rs) yield nullable(reg)).exists(_ == true) - case SEQs(rs) => (for(reg <- rs) yield nullable(reg)).forall(_ == true) - case STAR(_) => true + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALTs(rs) => rs.exists(nullable) + case SEQs(rs) => rs.forall(nullable) + case STAR(_) => true } -/* -nullable(ZERO) == false -nullable(ONE) == true -nullable(CHAR('a')) == false -nullable(ZERO | ONE) == true -nullable(ZERO | CHAR('a')) == false -nullable(ONE ~ ONE) == true -nullable(ONE ~ CHAR('a')) == false -nullable(STAR(ZERO)) == true -nullable(ALTs(List(ONE, CHAR('a'), ZERO))) == true -nullable(SEQs(List(ONE, ALTs(List(ONE, CHAR('a'), ZERO)), STAR(ZERO)))) == true -*/ - // (2) -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if(c == d) ONE else ZERO - case ALTs(rs) => ALTs(for(reg <- rs) yield der(c, reg)) - case SEQs(Nil) => ZERO - case SEQs(r :: rs) => if(nullable(r)) ALT(SEQs(der(c, r) :: rs), der(c, SEQs(rs))) else SEQs(der(c, r) :: rs) - case STAR(r) => SEQ(der(c,r), STAR(r)) +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALTs(rs) => ALTs(rs.map(der(c, _))) + case SEQs(Nil) => ZERO + case SEQs(r1::rs) => + if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs))) + else SEQs(der(c, r1) :: rs) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } -/* -der('a', ZERO | ONE) == (ZERO | ZERO) -der('a', (CHAR('a') | ONE) ~ CHAR('a')) == ALT((ONE | ZERO) ~ CHAR('a'), SEQs(List(ONE))) -der('a', (CHAR('a') | CHAR('a')) ~ CHAR('a')) == (ONE | ONE) ~ CHAR('a') -der('a', STAR(CHAR('a'))) == (ONE ~ STAR(CHAR('a'))) -der('b', STAR(CHAR('a'))) == (ZERO ~ STAR(CHAR('a'))) -*/ // (3) def denest(rs: List[Rexp]) : List[Rexp] = rs match { - case Nil => Nil - case ZERO :: rest => denest(rest) - case ALTs(rgs) :: rest => rgs ::: denest(rest) - case r :: rest => r :: denest(rest) + case Nil => Nil + case ZERO::tl => denest(tl) + case ALTs(rs1)::rs2 => rs1 ::: denest(rs2) + case r::rs => r :: denest(rs) } -/* -denest(List(ONE, ZERO, ALTs(List(ONE, CHAR('a'))))) == List(ONE, ONE, CHAR('a')) -denest(List(ONE ~ ONE, ZERO, ZERO | ONE)) == List(ONE ~ ONE, ZERO, ONE) -*/ - // (4) def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { - case Nil => acc - case ZERO :: rest => List(ZERO) - case ONE :: rest => flts(rest, acc) - case SEQs(rgs) :: rest => flts(rest, acc ::: rgs) - case r :: rest => flts(rest, acc ::: List(r)) + case Nil => acc + case ZERO::rs => ZERO::Nil + case ONE::rs => flts(rs, acc) + case SEQs(rs1)::rs => flts(rs, acc ::: rs1) + case r::rs => flts(rs, acc :+ r) } -/* -flts(List(CHAR('a'), ZERO, ONE), Nil) == List(ZERO) -flts(List(CHAR('a'), ONE, ONE, CHAR('b')), Nil) == List(CHAR('a'), CHAR('b')) -flts(List(ONE ~ CHAR('a'), CHAR('b') ~ ONE), Nil) == List(ONE, CHAR('a'), CHAR('b'), ONE) -*/ - // (5) def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { - case Nil => ZERO - case List(r) => r - case _ => ALTs(rs) + case Nil => ZERO + case r::Nil => r + case rs => ALTs(rs) } def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { - case Nil => ONE - case List(r) => r - case _ => SEQs(rs) + case Nil => ONE + case ZERO::Nil => ZERO + case r::Nil => r + case rs => SEQs(rs) } -/* -SEQs_smart(Nil) == ONE -SEQs_smart(List(ZERO)) == ZERO -SEQs_smart(List(CHAR('a'))) == CHAR('a') -SEQs_smart(List(ONE ~ ONE)) == ONE ~ ONE -SEQs_smart(List(ONE, ONE)) == SEQs(List(ONE, ONE)) -ALTs_smart(Nil) == ZERO -ALTs_smart(List(ONE ~ ONE)) == ONE ~ ONE -ALTs_smart(List(ZERO, ZERO)) == ALTs(List(ZERO, ZERO)) -*/ +// (6) -// (6) def simp(r: Rexp) : Rexp = r match { - case ALTs(rs) => ALTs_smart(denest(for(reg <- rs) yield simp(reg)).distinct) - case SEQs(rs) => SEQs_smart(flts(for(reg <- rs) yield simp(reg))) - case _ => r + case ALTs(rs) => + ALTs_smart(denest(rs.map(simp)).distinct) + case SEQs(rs) => + SEQs_smart(flts(rs.map(simp))) + case r => r } -/* -simp(ZERO | ONE) == ONE -simp(STAR(ZERO | ONE)) == STAR(ZERO | ONE) -simp(ONE ~ (ONE ~ (ONE ~ CHAR('a')))) == CHAR('a') -simp(((ONE ~ ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') -simp(((ONE | ONE) ~ ONE) ~ CHAR('a')) == CHAR('a') -simp(ONE ~ (ONE ~ (ONE ~ ZERO))) == ZERO -simp(ALT(ONE ~ (ONE ~ (ONE ~ ZERO)), CHAR('a'))) == CHAR('a') -simp(CHAR('a') | CHAR('a')) == CHAR('a') -simp(CHAR('a') ~ CHAR('a')) == CHAR('a') ~ CHAR('a') -simp(ONE | CHAR('a')) == (ONE | CHAR('a')) -simp(ALT((CHAR('a') | ZERO) ~ ONE,((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) == CHAR('a') -simp((ZERO | ((ZERO | ZERO) | (ZERO | ZERO))) ~ ((ONE | ZERO) | ONE ) ~ (CHAR('a'))) == ZERO -simp(ALT(ONE | ONE, ONE | ONE)) == ONE -simp(ALT(ZERO | CHAR('a'), CHAR('a') | ZERO)) == CHAR('a') -simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) == ALT(ONE, CHAR('a')) -simp(ALTs(Nil)) == ZERO -simp(SEQs(List(CHAR('a')))) == CHAR('a') -*/ +//println("Simp tests") +//println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))) +//println(simp(((CHAR('a') | ZERO) ~ ONE) | +// (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))) -// (7) +// (7) + def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c :: cs => ders(cs, simp(der(c, r))) -} -def matcher(r: Rexp, s: String): Boolean = { - val derivatives = ders(s.toList, r) - nullable(derivatives) + case Nil => r + case c::s => ders(s, simp(der(c, r))) } -/* -val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -ders("aaaaa".toList, EVIL) == SEQs(List(STAR(CHAR('a')), STAR(STAR(CHAR('a'))), CHAR('b'))) -ders(List('b'), EVIL) == ONE -ders("bb".toList, EVIL) == ZERO -matcher(EVIL, "a" * 5 ++ "b") == true -matcher(EVIL, "b") == true -matcher(EVIL, "bb") == false -matcher("abc", "abc") == true -matcher(("ab" | "a") ~ (ONE | "bc"), "abc") == true -matcher(ONE, "") == true -matcher(ZERO, "") == false -matcher(ONE | CHAR('a'), "") == true -matcher(ONE | CHAR('a'), "a") == true -*/ +// main matcher function +def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) // (8) + def size(r: Rexp): Int = r match { - case ZERO => 1 - case ONE => 1 - case CHAR(_) => 1 - case ALTs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum - case SEQs(rs) => 1 + (for(reg <- rs) yield size(reg)).sum - case STAR(r) => 1 + size(r) + case ZERO => 1 + case ONE => 1 + case CHAR(_) => 1 + case ALTs(rs) => 1 + rs.map(size).sum + case SEQs(rs) => 1 + rs.map(size).sum + case STAR(r1) => 1 + size(r1) } -/* -val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -size(der('a', der('a', EVIL))) == 36 -size(der('a', der('a', der('a', EVIL)))) == 83 -size(ders("aaaaaa".toList, EVIL)) == 7 -size(ders(("a" * 50).toList, EVIL)) == 7 -*/ -// Some testing data -//=================== +// some testing data /* - -simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a))) -simp(((CHAR('a') | ZERO) ~ ONE) | (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a) - -matcher(("a" ~ "b") ~ "c", "ab") // => false -matcher(("a" ~ "b") ~ "c", "abc") // => true - +println(matcher(("a" ~ "b") ~ "c", "abc")) // => true +println(matcher(("a" ~ "b") ~ "c", "ab")) // => false // the supposedly 'evil' regular expression (a*)* b val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -matcher(EVIL, "a" * 1000) // => false -matcher(EVIL, "a" * 1000 ++ "b") // => true - +println(matcher(EVIL, "a" * 1000 ++ "b")) // => true +println(matcher(EVIL, "a" * 1000)) // => false // size without simplifications -size(der('a', der('a', EVIL))) // => 36 -size(der('a', der('a', der('a', EVIL)))) // => 83 +println(size(der('a', der('a', EVIL)))) // => 36 +println(size(der('a', der('a', der('a', EVIL))))) // => 83 // size with simplification -size(simp(der('a', der('a', EVIL)))) // => 7 -size(simp(der('a', der('a', der('a', EVIL))))) // => 7 +println(simp(der('a', der('a', EVIL)))) +println(simp(der('a', der('a', der('a', EVIL))))) + +println(size(simp(der('a', der('a', EVIL))))) // => 7 +println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in -// 30 seconds. +// around 30 seconds. // -// Lets see how long it really takes to match strings with -// 5 Million a's...it should be in the range of a few -// of seconds. +// Lets see how long it takes to match strings with +// 5 Million a's...it should be in the range of a +// few seconds. def time_needed[T](i: Int, code: => T) = { val start = System.nanoTime() @@ -266,15 +185,15 @@ } // another "power" test case -simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE +println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE) // the Iterator produces the rexp // // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) // -// where SEQ is nested 50 times. +// where SEQ is nested 100 times. +*/ -*/ }