progs/scala/re.scala
changeset 78 279d0bc48308
parent 77 4b4c677501e7
child 81 7ac7782a7318
equal deleted inserted replaced
77:4b4c677501e7 78:279d0bc48308
    37 implicit def stringOps(s: String) = new {
    37 implicit def stringOps(s: String) = new {
    38   def | (r: Rexp) = ALT(s, r)
    38   def | (r: Rexp) = ALT(s, r)
    39   def | (r: String) = ALT(s, r)
    39   def | (r: String) = ALT(s, r)
    40   def % = STAR(s)
    40   def % = STAR(s)
    41   def ~ (r: Rexp) = SEQ(s, r)
    41   def ~ (r: Rexp) = SEQ(s, r)
    42   def ~ (r: String) = SEQ(s, r)
    42   def ~ (r: String) = SEQ(s, r)n jjjjjjj=99999ij9999ijn0n
    43   def $ (r: Rexp) = RECD(s, r)
    43   def $ (r: Rexp) = RECD(s, r)
    44 }
    44 }
    45 
    45 
    46 def pretty(r: Rexp) : String = r match {
    46 def pretty(r: Rexp) : String = r match {
    47   case NULL => "0"
    47   case NULL => "0"
    83   case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++ 
    83   case ALT(r1, r2) => (for (v1 <- values(r1)) yield Left(v1)) ++ 
    84                       (for (v2 <- values(r2)) yield Right(v2))
    84                       (for (v2 <- values(r2)) yield Right(v2))
    85   case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2)
    85   case SEQ(r1, r2) => for (v1 <- values(r1); v2 <- values(r2)) yield Sequ(v1, v2)
    86   case STAR(r) => Set(Void) ++ values(r)   // to do more would cause the set to be infinite
    86   case STAR(r) => Set(Void) ++ values(r)   // to do more would cause the set to be infinite
    87   case RECD(_, r) => values(r)
    87   case RECD(_, r) => values(r)
       
    88 }
       
    89 
       
    90 // zeroable function: tests whether the regular 
       
    91 // expression cannot regognise any string
       
    92 def zeroable (r: Rexp) : Boolean = r match {
       
    93   case NULL => true
       
    94   case EMPTY => false
       
    95   case CHAR(_) => false
       
    96   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
       
    97   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
       
    98   case STAR(_) => false
       
    99   case RECD(_, r1) => zeroable(r1)
    88 }
   100 }
    89 
   101 
    90 
   102 
    91 // nullable function: tests whether the regular 
   103 // nullable function: tests whether the regular 
    92 // expression can recognise the empty string
   104 // expression can recognise the empty string
   242 }
   254 }
   243 
   255 
   244 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   256 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
   245 
   257 
   246 
   258 
   247 // brute force infrastructure
   259 // brute force search infrastructure
   248 
   260 
       
   261 // enumerates regular expressions until a certain depth
   249 def enum(n: Int, s: String) : Set[Rexp] = n match {
   262 def enum(n: Int, s: String) : Set[Rexp] = n match {
   250   case 0 => Set(NULL, EMPTY) ++ s.toSet.map(CHAR)
   263   case 0 => Set(NULL, EMPTY) ++ s.toSet.map(CHAR)
   251   case n => {
   264   case n => {
   252     val rs = enum(n - 1, s)
   265     val rs = enum(n - 1, s)
   253     rs ++
   266     rs ++
   254     (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++
   267     (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++
   255     (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2))
   268     (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2))
   256   }
   269   }
   257 }
   270 }
   258 
   271 
   259 def ord(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
   272 def ordr(v1: Val, r: Rexp, v2: Val) : Boolean = (v1, r, v2) match {
   260   case (Void, EMPTY, Void) => true
   273   case (Void, EMPTY, Void) => true
   261   case (Chr(c1), CHAR(c2), Chr(c3)) if (c1==c2 && c1==c3) => true
   274   case (Chr(c1), CHAR(c2), Chr(c3)) if (c1 == c2 && c1 == c3) => true
   262   case (Left(v1), ALT(r1,r2), Left(v2)) => ord(v1,r1,v2)
   275   case (Left(v1), ALT(r1, r2), Left(v2)) => ordr(v1, r1, v2)
   263   case (Right(v1), ALT(r1,r2), Right(v2)) => ord(v1,r2,v2)
   276   case (Right(v1), ALT(r1, r2), Right(v2)) => ordr(v1, r2, v2)
   264   case (Left(v1), ALT(r1,r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
   277   case (Left(v1), ALT(r1, r2), Right(v2)) => flatten(v2).length <= flatten(v1).length
   265   case (Right(v1), ALT(r1,r2), Left(v2)) => flatten(v2).length < flatten(v1).length
   278   case (Right(v1), ALT(r1, r2), Left(v2)) => flatten(v2).length < flatten(v1).length
   266   case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1==v3) => ord(v2, r2, v4)
   279   case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 == v3) => ordr(v2, r2, v4)
   267   case (Sequ(v1,v2), SEQ(r1,r2), Sequ(v3,v4)) if(v1!=v3) => ord(v1, r1, v3)
   280   case (Sequ(v1, v2), SEQ(r1, r2), Sequ(v3, v4)) if (v1 != v3) => ordr(v1, r1, v3)
   268   case _ => false
   281   case _ => false
   269 }     
   282 }     
   270 
   283 
   271 def tst(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
   284 def ord(v1: Val, v2: Val) : Boolean = (v1, v2) match {
       
   285   case (Void, Void) => true
       
   286   case (Chr(c1), Chr(c2)) if (c1 == c2) => true
       
   287   case (Left(v1), Left(v2)) => ord(v1, v2)
       
   288   case (Right(v1), Right(v2)) => ord(v1, v2)
       
   289   case (Left(v1), Right(v2)) => flatten(v2).length <= flatten(v1).length
       
   290   case (Right(v1), Left(v2)) => flatten(v2).length < flatten(v1).length
       
   291   case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 == v3) => ord(v2, v4)
       
   292   case (Sequ(v1, v2), Sequ(v3, v4)) if (v1 != v3) => ord(v1, v3)
       
   293   case _ => false
       
   294 }     
       
   295 
       
   296 // exhaustively tests values of a regular expression
       
   297 def vfilter2(f: Rexp => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val)] = {
       
   298   val vs = values(r)
       
   299   for (v1 <- vs; v2 <- vs; if (f(r)(v1)(v2))) yield (r, v1, v2)
       
   300 }
       
   301 
       
   302 def vfilter3(f: Rexp => Val => Val => Val => Boolean)(r: Rexp) : Set[(Rexp, Val, Val, Val)] = {
       
   303   val vs = values(r)
       
   304   for (v1 <- vs; v2 <- vs; v3 <- vs; if (f(r)(v1)(v2)(v3))) yield (r, v1, v2, v3)
       
   305 }
       
   306 
       
   307 // number of test cases
       
   308 enum(3, "a").size
       
   309 enum(2, "ab").size
       
   310 enum(2, "abc").size
       
   311 //enum(3, "ab").size
       
   312 
       
   313 
       
   314 // tests for totality
       
   315 def totality_test(r: Rexp)(v1: Val)(v2: Val) : Boolean =
       
   316   !(ord(v1, v2) || ord(v2, v1))
       
   317 
       
   318 enum(2, "ab").flatMap(vfilter2(totality_test))
       
   319 enum(3, "a").flatMap(vfilter2(totality_test))
       
   320 
       
   321 
       
   322 //tests for transitivity (simple transitivity fails)
       
   323 def transitivity_test(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = 
       
   324   ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
       
   325 
       
   326 def transitivity_test_extended(r: Rexp)(v1: Val)(v2: Val)(v3: Val) : Boolean = {
       
   327   flatten(v1) == flatten(v2) && flatten(v2) == flatten(v3) && 
       
   328   ord(v1, v2) && ord(v2, v3) && !ord(v1, v3)
       
   329 }
       
   330 
       
   331 // smallest(?) example where simple transitivity fails 
       
   332 val test1 = enum(3, "a").flatMap(vfilter3(transitivity_test))
       
   333 val test1_smallest = test1.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
       
   334 
       
   335 pretty(test1_smallest._1)
       
   336 vpretty(test1_smallest._2)
       
   337 vpretty(test1_smallest._3)
       
   338 vpretty(test1_smallest._4)
       
   339 
       
   340 ord(test1_smallest._2, test1_smallest._3)
       
   341 ord(test1_smallest._3, test1_smallest._4)
       
   342 ord(test1_smallest._2, test1_smallest._4)
       
   343 ordr(test1_smallest._3, test1_smallest._1, test1_smallest._4)
       
   344 
       
   345 // with flatten test
       
   346 enum(3, "a").flatMap(vfilter3(transitivity_test_extended))
       
   347 
       
   348 //injection tests
       
   349 def injection_test(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
       
   350   v1 != v2 && 
       
   351   ord(v1, v2) && 
       
   352   ord(inj(r, c, v2), inj(r, c, v1)) && 
       
   353   flatten(v1) == flatten(v2)
       
   354 }
       
   355 
       
   356 def injection_testA(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
       
   357   v1 != v2 && 
       
   358   ord(v1, v2) && 
       
   359   ord(inj(r, c, v2), inj(r, c, v1)) && 
       
   360   flatten(v1).length == flatten(v2).length
       
   361 }
       
   362 
       
   363 def injection_test2(r: Rexp)(c: Char)(v1: Val)(v2: Val) : Boolean = {
       
   364   v1 != v2 && 
       
   365   ord(v1, v2) && 
       
   366   ord(inj(r, c, v2), inj(r, c, v1))
       
   367 }
       
   368 def vfilter4(f: Rexp => Char => Val => Val => Boolean)(c: Char)(r: Rexp) : Set[(Rexp, Rexp, Val, Val)] = {
   272   val r_der = der(c, r)
   369   val r_der = der(c, r)
   273   val vs = values(r_der)
   370   val vs = values(r_der)
   274   for (v1 <- vs; v2 <- vs; 
   371   for (v1 <- vs; v2 <- vs; if (f(r)(c)(v1)(v2))) yield (r, r_der, v1, v2)
   275        if (v1 != v2 && ord(v1, r_der, v2) && ord(inj(r, c, v2), r, inj(r, c, v1)) && 
   372 } 
   276            (flatten(inj(r, c, v1)) == flatten(inj(r, c, v2))))) 
   373 
   277   yield (r, v1, v2, inj(r, c, v1), inj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))  
   374 enum(3, "a").flatMap(vfilter4(injection_test)('a'))
   278 }
   375 enum(3, "a").flatMap(vfilter4(injection_testA)('a'))
   279 
   376 
   280 def tst2(r: Rexp, c: Char) : Set[(Rexp, Val, Val, Val, Val, List[String], List[String])] = {
   377 val test2 = enum(3, "a").flatMap(vfilter4(injection_test2)('a'))
   281   val r_der = der(c, r)
   378 val test2_smallest = test2.toList.sortWith { (x,y) => size(x._1) < size(y._1) }.head
   282   val vs = values(r_der)
   379 
   283   for (v1 <- vs; v2 <- vs; 
   380 pretty(test2_smallest._1)
   284        if (v1 != v2 && ord(v1, r_der, v2) && ord(proj(r, c, v2), r_der, proj(r, c, v1))) 
   381 pretty(test2_smallest._2)
   285   yield (r, v1, v2, proj(r, c, v1), proj(r, c, v2), flattens(inj(r, c, v1)), flattens(inj(r, c, v2)))  
   382 vpretty(test2_smallest._3)
   286 }
   383 vpretty(test2_smallest._4)
   287 
   384 vpretty(inj(test2_smallest._1, 'a', test2_smallest._3))
   288 def comp(r1: (Rexp, Val, Val, Val, Val, List[String], List[String]), 
   385 vpretty(inj(test2_smallest._1, 'a', test2_smallest._4))
   289          r2: (Rexp, Val, Val, Val, Val, List[String], List[String])) = size(r1._1) < size(r2._1)
       
   290 
       
   291 
       
   292 val smallest = enum(3, "a").flatMap(tst(_, 'a')).toList.sortWith { comp }
       
   293 
       
   294 smallest match {
       
   295   case Nil => "none"
       
   296   case (r, v1, v2, v3, v4, s1, s2)::_ => List(pretty(r),
       
   297                                    pretty(der('a', r)),
       
   298                                    vpretty(v1),
       
   299                                    vpretty(v2),
       
   300                                    vpretty(v3),
       
   301                                    vpretty(v4), s1, s2).mkString("\n") }
       
   302 
   386 
   303 // Lexing Rules for a Small While Language
   387 // Lexing Rules for a Small While Language
   304 
   388 
   305 def PLUS(r: Rexp) = r ~ r.%
   389 def PLUS(r: Rexp) = r ~ r.%
   306 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
   390 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
   467 vpretty(m)
   551 vpretty(m)
   468 val vb = inj(aa, 'b', m)
   552 val vb = inj(aa, 'b', m)
   469 vpretty(vb)
   553 vpretty(vb)
   470 val va = inj(a0, 'a', vb)
   554 val va = inj(a0, 'a', vb)
   471 vpretty(va)
   555 vpretty(va)
       
   556 
       
   557 
       
   558 /* ord is not transitive in general: counter-example
       
   559  *
       
   560  * (1)  Left(Right(Right(())))
       
   561  * (2)  Right(Left(()) ~ Left(()))
       
   562  * (3)  Right(Right(()) ~ Right(a))
       
   563  *
       
   564  * (1) > (2); (2) > (3) but not (1) > (3)
       
   565 */