progs/scala/re.scala
changeset 78 279d0bc48308
parent 77 4b4c677501e7
child 81 7ac7782a7318
--- a/progs/scala/re.scala	Mon May 25 16:09:23 2015 +0100
+++ b/progs/scala/re.scala	Mon Jun 08 14:37:19 2015 +0100
@@ -39,7 +39,7 @@
   def | (r: String) = ALT(s, r)
   def % = STAR(s)
   def ~ (r: Rexp) = SEQ(s, r)
-  def ~ (r: String) = SEQ(s, r)
+  def ~ (r: String) = SEQ(s, r)n jjjjjjj=99999ij9999ijn0n
   def $ (r: Rexp) = RECD(s, r)
 }
 
@@ -87,6 +87,18 @@
   case RECD(_, r) => values(r)
 }
 
+// zeroable function: tests whether the regular 
+// expression cannot regognise any string
+def zeroable (r: Rexp) : Boolean = r match {
+  case NULL => true
+  case EMPTY => false
+  case CHAR(_) => false
+  case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+  case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
+  case STAR(_) => false
+  case RECD(_, r1) => zeroable(r1)
+}
+
 
 // nullable function: tests whether the regular 
 // expression can recognise the empty string
@@ -244,8 +256,9 @@
 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
 
 
-// brute force infrastructure
+// brute force search infrastructure
 
+// enumerates regular expressions until a certain depth
 def enum(n: Int, s: String) : Set[Rexp] = n match {
   case 0 => Set(NULL, EMPTY) ++ s.toSet.map(CHAR)
   case n => {
@@ -256,49 +269,120 @@
   }
 }
 
-def ord(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
+def ordr(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
   case (Void, EMPTY, Void) => true
-  case (Chr(c1), CHAR(c2), Chr(c3)) if (c1==c2 && c1==c3) => true
-  case (Left(v1), ALT(r1,r2), Left(v2)) => ord(v1,r1,v2)
-  case (Right(v1), ALT(r1,r2), Right(v2)) => ord(v1,r2,v2)
-  case (Left(v1), ALT(r1,r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
-  case (Right(v1), ALT(r1,r2), Left(v2)) => flatten(v2).length < flatten(v1).length
-  case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1==v3) => ord(v2, r2, v4)
-  case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1!=v3) => ord(v1, r1, v3)
+  case (Chr(c1), CHAR(c2), Chr(c3)) if (c1 == c2 && c1 == c3) => true
+  case (Left(v1), ALT(r1, r2), Left(v2)) => ordr(v1, r1, v2)
+  case (Right(v1), ALT(r1, r2), Right(v2)) => ordr(v1, r2, v2)
+  case (Left(v1), ALT(r1, r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
+  case (Right(v1), ALT(r1, r2), Left(v2)) => flatten(v2).length < flatten(v1).length
+  case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 == v3) => ordr(v2, r2, v4)
+  case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 != v3) => ordr(v1, r1, v3)
+  case _ => false
+}     
+
+def ord(v1: Val, v2: Val) : Boolean = (v1, v2) match {
+  case (Void, Void) => true
+  case (Chr(c1), Chr(c2)) if (c1 == c2) => true
+  case (Left(v1), Left(v2)) => ord(v1, v2)
+  case (Right(v1), Right(v2)) => ord(v1, v2)
+  case (Left(v1), Right(v2)) => flatten(v2).length <= flatten(v1).length
+  case (Right(v1), Left(v2)) => flatten(v2).length < flatten(v1).length
+  case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 == v3) => ord(v2, v4)
+  case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 != v3) => ord(v1, v3)
   case _ => false
 }     
 
-def tst(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
-  val r_der = der(c, r)
-  val vs = values(r_der)
-  for (v1 <- vs; v2 <- vs; 
-       if (v1 != v2 && ord(v1, r_der, v2) && ord(inj(r, c, v2), r, inj(r, c, v1)) && 
-           (flatten(inj(r, c, v1)) == flatten(inj(r, c, v2))))) 
-  yield (r, v1, v2, inj(r, c, v1), inj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))  
+// exhaustively tests values of a regular expression
+def vfilter2(f: Rexp => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val)] = {
+  val vs = values(r)
+  for (v1 <- vs; v2 <- vs; if (f(r)(v1)(v2))) yield (r, v1, v2)
+}
+
+def vfilter3(f: Rexp => Val => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val, Val)] = {
+  val vs = values(r)
+  for (v1 <- vs; v2 <- vs; v3 <- vs; if (f(r)(v1)(v2)(v3))) yield (r, v1, v2, v3)
+}
+
+// number of test cases
+enum(3, "a").size
+enum(2, "ab").size
+enum(2, "abc").size
+//enum(3, "ab").size
+
+
+// tests for totality
+def totality_test(r: Rexp)(v1: Val)(v2: Val) : Boolean =
+  !(ord(v1, v2) || ord(v2, v1))
+
+enum(2, "ab").flatMap(vfilter2(totality_test))
+enum(3, "a").flatMap(vfilter2(totality_test))
+
+
+//tests for transitivity (simple transitivity fails)
+def transitivity_test(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = 
+  ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
+
+def transitivity_test_extended(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = {
+  flatten(v1) == flatten(v2) && flatten(v2) == flatten(v3) && 
+  ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
 }
 
-def tst2(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
+// smallest(?) example where simple transitivity fails 
+val test1 = enum(3, "a").flatMap(vfilter3(transitivity_test))
+val test1_smallest = test1.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
+
+pretty(test1_smallest._1)
+vpretty(test1_smallest._2)
+vpretty(test1_smallest._3)
+vpretty(test1_smallest._4)
+
+ord(test1_smallest._2, test1_smallest._3)
+ord(test1_smallest._3, test1_smallest._4)
+ord(test1_smallest._2, test1_smallest._4)
+ordr(test1_smallest._3, test1_smallest._1, test1_smallest._4)
+
+// with flatten test
+enum(3, "a").flatMap(vfilter3(transitivity_test_extended))
+
+//injection tests
+def injection_test(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+  v1 != v2 && 
+  ord(v1, v2) && 
+  ord(inj(r, c, v2), inj(r, c, v1)) && 
+  flatten(v1) == flatten(v2)
+}
+
+def injection_testA(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+  v1 != v2 && 
+  ord(v1, v2) && 
+  ord(inj(r, c, v2), inj(r, c, v1)) && 
+  flatten(v1).length == flatten(v2).length
+}
+
+def injection_test2(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+  v1 != v2 && 
+  ord(v1, v2) && 
+  ord(inj(r, c, v2), inj(r, c, v1))
+}
+def vfilter4(f: Rexp => Char => Val => Val => Boolean)(c: Char)(r: Rexp) : Set[(Rexp, Rexp, Val, Val)] = {
   val r_der = der(c, r)
   val vs = values(r_der)
-  for (v1 <- vs; v2 <- vs; 
-       if (v1 != v2 && ord(v1, r_der, v2) && ord(proj(r, c, v2), r_der, proj(r, c, v1))) 
-  yield (r, v1, v2, proj(r, c, v1), proj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))  
-}
+  for (v1 <- vs; v2 <- vs; if (f(r)(c)(v1)(v2))) yield (r, r_der, v1, v2)
+} 
 
-def comp(r1: (Rexp, Val, Val, Val, Val, List[String], List[String]), 
-         r2: (Rexp, Val, Val, Val, Val, List[String], List[String])) = size(r1._1) < size(r2._1)
-
+enum(3, "a").flatMap(vfilter4(injection_test)('a'))
+enum(3, "a").flatMap(vfilter4(injection_testA)('a'))
 
-val smallest = enum(3, "a").flatMap(tst(_, 'a')).toList.sortWith { comp }
+val test2 = enum(3, "a").flatMap(vfilter4(injection_test2)('a'))
+val test2_smallest = test2.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
 
-smallest match {
-  case Nil => "none"
-  case (r, v1, v2, v3, v4, s1, s2)::_ => List(pretty(r),
-                                   pretty(der('a', r)),
-                                   vpretty(v1),
-                                   vpretty(v2),
-                                   vpretty(v3),
-                                   vpretty(v4), s1, s2).mkString("\n") }
+pretty(test2_smallest._1)
+pretty(test2_smallest._2)
+vpretty(test2_smallest._3)
+vpretty(test2_smallest._4)
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._3))
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._4))
 
 // Lexing Rules for a Small While Language
 
@@ -469,3 +553,13 @@
 vpretty(vb)
 val va = inj(a0, 'a', vb)
 vpretty(va)
+
+
+/* ord is not transitive in general: counter-example
+ *
+ * (1)  Left(Right(Right(())))
+ * (2)  Right(Left(()) ~ Left(()))
+ * (3)  Right(Right(()) ~ Right(a))
+ *
+ * (1) > (2); (2) > (3) but not (1) > (3)
+*/