--- a/progs/scala/re.scala Mon May 25 16:09:23 2015 +0100
+++ b/progs/scala/re.scala Mon Jun 08 14:37:19 2015 +0100
@@ -39,7 +39,7 @@
def | (r: String) = ALT(s, r)
def % = STAR(s)
def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)n jjjjjjj=99999ij9999ijn0n
def $ (r: Rexp) = RECD(s, r)
}
@@ -87,6 +87,18 @@
case RECD(_, r) => values(r)
}
+// zeroable function: tests whether the regular
+// expression cannot regognise any string
+def zeroable (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+ case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
+ case STAR(_) => false
+ case RECD(_, r1) => zeroable(r1)
+}
+
// nullable function: tests whether the regular
// expression can recognise the empty string
@@ -244,8 +256,9 @@
def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
-// brute force infrastructure
+// brute force search infrastructure
+// enumerates regular expressions until a certain depth
def enum(n: Int, s: String) : Set[Rexp] = n match {
case 0 => Set(NULL, EMPTY) ++ s.toSet.map(CHAR)
case n => {
@@ -256,49 +269,120 @@
}
}
-def ord(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
+def ordr(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
case (Void, EMPTY, Void) => true
- case (Chr(c1), CHAR(c2), Chr(c3)) if (c1==c2 && c1==c3) => true
- case (Left(v1), ALT(r1,r2), Left(v2)) => ord(v1,r1,v2)
- case (Right(v1), ALT(r1,r2), Right(v2)) => ord(v1,r2,v2)
- case (Left(v1), ALT(r1,r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
- case (Right(v1), ALT(r1,r2), Left(v2)) => flatten(v2).length < flatten(v1).length
- case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1==v3) => ord(v2, r2, v4)
- case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1!=v3) => ord(v1, r1, v3)
+ case (Chr(c1), CHAR(c2), Chr(c3)) if (c1 == c2 && c1 == c3) => true
+ case (Left(v1), ALT(r1, r2), Left(v2)) => ordr(v1, r1, v2)
+ case (Right(v1), ALT(r1, r2), Right(v2)) => ordr(v1, r2, v2)
+ case (Left(v1), ALT(r1, r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
+ case (Right(v1), ALT(r1, r2), Left(v2)) => flatten(v2).length < flatten(v1).length
+ case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 == v3) => ordr(v2, r2, v4)
+ case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 != v3) => ordr(v1, r1, v3)
+ case _ => false
+}
+
+def ord(v1: Val, v2: Val) : Boolean = (v1, v2) match {
+ case (Void, Void) => true
+ case (Chr(c1), Chr(c2)) if (c1 == c2) => true
+ case (Left(v1), Left(v2)) => ord(v1, v2)
+ case (Right(v1), Right(v2)) => ord(v1, v2)
+ case (Left(v1), Right(v2)) => flatten(v2).length <= flatten(v1).length
+ case (Right(v1), Left(v2)) => flatten(v2).length < flatten(v1).length
+ case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 == v3) => ord(v2, v4)
+ case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 != v3) => ord(v1, v3)
case _ => false
}
-def tst(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
- val r_der = der(c, r)
- val vs = values(r_der)
- for (v1 <- vs; v2 <- vs;
- if (v1 != v2 && ord(v1, r_der, v2) && ord(inj(r, c, v2), r, inj(r, c, v1)) &&
- (flatten(inj(r, c, v1)) == flatten(inj(r, c, v2)))))
- yield (r, v1, v2, inj(r, c, v1), inj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))
+// exhaustively tests values of a regular expression
+def vfilter2(f: Rexp => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val)] = {
+ val vs = values(r)
+ for (v1 <- vs; v2 <- vs; if (f(r)(v1)(v2))) yield (r, v1, v2)
+}
+
+def vfilter3(f: Rexp => Val => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val, Val)] = {
+ val vs = values(r)
+ for (v1 <- vs; v2 <- vs; v3 <- vs; if (f(r)(v1)(v2)(v3))) yield (r, v1, v2, v3)
+}
+
+// number of test cases
+enum(3, "a").size
+enum(2, "ab").size
+enum(2, "abc").size
+//enum(3, "ab").size
+
+
+// tests for totality
+def totality_test(r: Rexp)(v1: Val)(v2: Val) : Boolean =
+ !(ord(v1, v2) || ord(v2, v1))
+
+enum(2, "ab").flatMap(vfilter2(totality_test))
+enum(3, "a").flatMap(vfilter2(totality_test))
+
+
+//tests for transitivity (simple transitivity fails)
+def transitivity_test(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean =
+ ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
+
+def transitivity_test_extended(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = {
+ flatten(v1) == flatten(v2) && flatten(v2) == flatten(v3) &&
+ ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
}
-def tst2(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
+// smallest(?) example where simple transitivity fails
+val test1 = enum(3, "a").flatMap(vfilter3(transitivity_test))
+val test1_smallest = test1.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
+
+pretty(test1_smallest._1)
+vpretty(test1_smallest._2)
+vpretty(test1_smallest._3)
+vpretty(test1_smallest._4)
+
+ord(test1_smallest._2, test1_smallest._3)
+ord(test1_smallest._3, test1_smallest._4)
+ord(test1_smallest._2, test1_smallest._4)
+ordr(test1_smallest._3, test1_smallest._1, test1_smallest._4)
+
+// with flatten test
+enum(3, "a").flatMap(vfilter3(transitivity_test_extended))
+
+//injection tests
+def injection_test(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+ v1 != v2 &&
+ ord(v1, v2) &&
+ ord(inj(r, c, v2), inj(r, c, v1)) &&
+ flatten(v1) == flatten(v2)
+}
+
+def injection_testA(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+ v1 != v2 &&
+ ord(v1, v2) &&
+ ord(inj(r, c, v2), inj(r, c, v1)) &&
+ flatten(v1).length == flatten(v2).length
+}
+
+def injection_test2(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+ v1 != v2 &&
+ ord(v1, v2) &&
+ ord(inj(r, c, v2), inj(r, c, v1))
+}
+def vfilter4(f: Rexp => Char => Val => Val => Boolean)(c: Char)(r: Rexp) : Set[(Rexp, Rexp, Val, Val)] = {
val r_der = der(c, r)
val vs = values(r_der)
- for (v1 <- vs; v2 <- vs;
- if (v1 != v2 && ord(v1, r_der, v2) && ord(proj(r, c, v2), r_der, proj(r, c, v1)))
- yield (r, v1, v2, proj(r, c, v1), proj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))
-}
+ for (v1 <- vs; v2 <- vs; if (f(r)(c)(v1)(v2))) yield (r, r_der, v1, v2)
+}
-def comp(r1: (Rexp, Val, Val, Val, Val, List[String], List[String]),
- r2: (Rexp, Val, Val, Val, Val, List[String], List[String])) = size(r1._1) < size(r2._1)
-
+enum(3, "a").flatMap(vfilter4(injection_test)('a'))
+enum(3, "a").flatMap(vfilter4(injection_testA)('a'))
-val smallest = enum(3, "a").flatMap(tst(_, 'a')).toList.sortWith { comp }
+val test2 = enum(3, "a").flatMap(vfilter4(injection_test2)('a'))
+val test2_smallest = test2.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
-smallest match {
- case Nil => "none"
- case (r, v1, v2, v3, v4, s1, s2)::_ => List(pretty(r),
- pretty(der('a', r)),
- vpretty(v1),
- vpretty(v2),
- vpretty(v3),
- vpretty(v4), s1, s2).mkString("\n") }
+pretty(test2_smallest._1)
+pretty(test2_smallest._2)
+vpretty(test2_smallest._3)
+vpretty(test2_smallest._4)
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._3))
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._4))
// Lexing Rules for a Small While Language
@@ -469,3 +553,13 @@
vpretty(vb)
val va = inj(a0, 'a', vb)
vpretty(va)
+
+
+/* ord is not transitive in general: counter-example
+ *
+ * (1) Left(Right(Right(())))
+ * (2) Right(Left(()) ~ Left(()))
+ * (3) Right(Right(()) ~ Right(a))
+ *
+ * (1) > (2); (2) > (3) but not (1) > (3)
+*/