lexing: comparison progs/scala/re.scala

equal deleted inserted replaced

-:4b4c677501e7
+:279d0bc48308
 implicit def stringOps(s: String) = new {
 def | (r: Rexp) = ALT(s, r)
 def | (r: String) = ALT(s, r)
 def % = STAR(s)
 def ~ (r: Rexp) = SEQ(s, r)
-def ~ (r: String) = SEQ(s, r)
+def ~ (r: String) = SEQ(s, r)n jjjjjjj=99999ij9999ijn0n
 def $ (r: Rexp) = RECD(s, r)
 }
 def pretty(r: Rexp) : String = r match {
 case NULL => "0"
 case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++
 (for (v2 <- values(r2)) yield Right(v2))
 case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2)
 case STAR(r) => Set(Void) ++ values(r)   // to do more would cause the set to be infinite
 case RECD(_, r) => values(r)
+}
+// zeroable function: tests whether the regular
+// expression cannot regognise any string
+def zeroable (r: Rexp) : Boolean = r match {
+case NULL => true
+case EMPTY => false
+case CHAR(_) => false
+case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
+case STAR(_) => false
+case RECD(_, r1) => zeroable(r1)
 }
 // nullable function: tests whether the regular
 // expression can recognise the empty string
 }
 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
-// brute force infrastructure
+// brute force search infrastructure
+// enumerates regular expressions until a certain depth
 def enum(n: Int, s: String) : Set[Rexp] = n match {
 case 0 => Set(NULL, EMPTY) ++ s.toSet.map(CHAR)
 case n => {
 val rs = enum(n - 1, s)
 rs ++
 (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++
 (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2))
 }
 }
-def ord(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
+def ordr(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
 case (Void, EMPTY, Void) => true
-case (Chr(c1), CHAR(c2), Chr(c3)) if (c1==c2 && c1==c3) => true
+case (Chr(c1), CHAR(c2), Chr(c3)) if (c1 == c2 && c1 == c3) => true
-case (Left(v1), ALT(r1,r2), Left(v2)) => ord(v1,r1,v2)
+case (Left(v1), ALT(r1, r2), Left(v2)) => ordr(v1, r1, v2)
-case (Right(v1), ALT(r1,r2), Right(v2)) => ord(v1,r2,v2)
+case (Right(v1), ALT(r1, r2), Right(v2)) => ordr(v1, r2, v2)
-case (Left(v1), ALT(r1,r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
+case (Left(v1), ALT(r1, r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
-case (Right(v1), ALT(r1,r2), Left(v2)) => flatten(v2).length < flatten(v1).length
+case (Right(v1), ALT(r1, r2), Left(v2)) => flatten(v2).length < flatten(v1).length
-case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1==v3) => ord(v2, r2, v4)
+case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 == v3) => ordr(v2, r2, v4)
-case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1!=v3) => ord(v1, r1, v3)
+case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 != v3) => ordr(v1, r1, v3)
 case _ => false
 }
-def tst(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
+def ord(v1: Val, v2: Val) : Boolean = (v1, v2) match {
+case (Void, Void) => true
+case (Chr(c1), Chr(c2)) if (c1 == c2) => true
+case (Left(v1), Left(v2)) => ord(v1, v2)
+case (Right(v1), Right(v2)) => ord(v1, v2)
+case (Left(v1), Right(v2)) => flatten(v2).length <= flatten(v1).length
+case (Right(v1), Left(v2)) => flatten(v2).length < flatten(v1).length
+case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 == v3) => ord(v2, v4)
+case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 != v3) => ord(v1, v3)
+case _ => false
+}
+// exhaustively tests values of a regular expression
+def vfilter2(f: Rexp => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val)] = {
+val vs = values(r)
+for (v1 <- vs; v2 <- vs; if (f(r)(v1)(v2))) yield (r, v1, v2)
+}
+def vfilter3(f: Rexp => Val => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val, Val)] = {
+val vs = values(r)
+for (v1 <- vs; v2 <- vs; v3 <- vs; if (f(r)(v1)(v2)(v3))) yield (r, v1, v2, v3)
+}
+// number of test cases
+enum(3, "a").size
+enum(2, "ab").size
+enum(2, "abc").size
+//enum(3, "ab").size
+// tests for totality
+def totality_test(r: Rexp)(v1: Val)(v2: Val) : Boolean =
+!(ord(v1, v2) || ord(v2, v1))
+enum(2, "ab").flatMap(vfilter2(totality_test))
+enum(3, "a").flatMap(vfilter2(totality_test))
+//tests for transitivity (simple transitivity fails)
+def transitivity_test(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean =
+ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
+def transitivity_test_extended(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = {
+flatten(v1) == flatten(v2) && flatten(v2) == flatten(v3) &&
+ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
+}
+// smallest(?) example where simple transitivity fails
+val test1 = enum(3, "a").flatMap(vfilter3(transitivity_test))
+val test1_smallest = test1.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
+pretty(test1_smallest._1)
+vpretty(test1_smallest._2)
+vpretty(test1_smallest._3)
+vpretty(test1_smallest._4)
+ord(test1_smallest._2, test1_smallest._3)
+ord(test1_smallest._3, test1_smallest._4)
+ord(test1_smallest._2, test1_smallest._4)
+ordr(test1_smallest._3, test1_smallest._1, test1_smallest._4)
+// with flatten test
+enum(3, "a").flatMap(vfilter3(transitivity_test_extended))
+//injection tests
+def injection_test(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+v1 != v2 &&
+ord(v1, v2) &&
+ord(inj(r, c, v2), inj(r, c, v1)) &&
+flatten(v1) == flatten(v2)
+}
+def injection_testA(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+v1 != v2 &&
+ord(v1, v2) &&
+ord(inj(r, c, v2), inj(r, c, v1)) &&
+flatten(v1).length == flatten(v2).length
+}
+def injection_test2(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
+v1 != v2 &&
+ord(v1, v2) &&
+ord(inj(r, c, v2), inj(r, c, v1))
+}
+def vfilter4(f: Rexp => Char => Val => Val => Boolean)(c: Char)(r: Rexp) : Set[(Rexp, Rexp, Val, Val)] = {
 val r_der = der(c, r)
 val vs = values(r_der)
-for (v1 <- vs; v2 <- vs;
+for (v1 <- vs; v2 <- vs; if (f(r)(c)(v1)(v2))) yield (r, r_der, v1, v2)
-if (v1 != v2 && ord(v1, r_der, v2) && ord(inj(r, c, v2), r, inj(r, c, v1)) &&
+}
-(flatten(inj(r, c, v1)) == flatten(inj(r, c, v2)))))
-yield (r, v1, v2, inj(r, c, v1), inj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))
+enum(3, "a").flatMap(vfilter4(injection_test)('a'))
-}
+enum(3, "a").flatMap(vfilter4(injection_testA)('a'))
-def tst2(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
+val test2 = enum(3, "a").flatMap(vfilter4(injection_test2)('a'))
-val r_der = der(c, r)
+val test2_smallest = test2.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
-val vs = values(r_der)
-for (v1 <- vs; v2 <- vs;
+pretty(test2_smallest._1)
-if (v1 != v2 && ord(v1, r_der, v2) && ord(proj(r, c, v2), r_der, proj(r, c, v1)))
+pretty(test2_smallest._2)
-yield (r, v1, v2, proj(r, c, v1), proj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))
+vpretty(test2_smallest._3)
-}
+vpretty(test2_smallest._4)
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._3))
-def comp(r1: (Rexp, Val, Val, Val, Val, List[String], List[String]),
+vpretty(inj(test2_smallest._1, 'a', test2_smallest._4))
-r2: (Rexp, Val, Val, Val, Val, List[String], List[String])) = size(r1._1) < size(r2._1)
-val smallest = enum(3, "a").flatMap(tst(_, 'a')).toList.sortWith { comp }
-smallest match {
-case Nil => "none"
-case (r, v1, v2, v3, v4, s1, s2)::_ => List(pretty(r),
-pretty(der('a', r)),
-vpretty(v1),
-vpretty(v2),
-vpretty(v3),
-vpretty(v4), s1, s2).mkString("\n") }
 // Lexing Rules for a Small While Language
 def PLUS(r: Rexp) = r ~ r.%
 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
 vpretty(m)
 val vb = inj(aa, 'b', m)
 vpretty(vb)
 val va = inj(a0, 'a', vb)
 vpretty(va)
+/* ord is not transitive in general: counter-example
+*
+* (1)  Left(Right(Right(())))
+* (2)  Right(Left(()) ~ Left(()))
+* (3)  Right(Right(()) ~ Right(a))
+*
+* (1) > (2); (2) > (3) but not (1) > (3)
+*/

changeset 78	279d0bc48308
parent 77	4b4c677501e7
child 81	7ac7782a7318