Attic/re3ext.scala
changeset 742 b5b5583a3a08
parent 422 5deefcc8cffa
equal deleted inserted replaced
741:e66bd5c563eb 742:b5b5583a3a08
       
     1 abstract class Rexp 
       
     2 case object ZERO extends Rexp
       
     3 case object ONE extends Rexp
       
     4 case class CHAR(c: Char) extends Rexp
       
     5 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
       
     6 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
     7 case class STAR(r: Rexp) extends Rexp 
       
     8 case class NTIMES(r: Rexp, n: Int) extends Rexp 
       
     9 
       
    10 def simp(r: Rexp): Rexp = r match {
       
    11   case ALT(r1, r2) => {
       
    12     (simp(r1), simp(r2)) match {
       
    13       case (ZERO, r2s) => r2s
       
    14       case (r1s, ZERO) => r1s
       
    15       case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
       
    16     }
       
    17   }
       
    18   case SEQ(r1, r2) => {
       
    19     (simp(r1), simp(r2)) match {
       
    20       case (ZERO, _) => ZERO
       
    21       case (_, ZERO) => ZERO
       
    22       case (ONE, r2s) => r2s
       
    23       case (r1s, ONE) => r1s
       
    24       case (r1s, r2s) => SEQ(r1s, r2s)
       
    25     }
       
    26   }
       
    27   case NTIMES(r, n) => NTIMES(simp(r), n)    
       
    28   case r => r
       
    29 }
       
    30 
       
    31 def nullable (r: Rexp) : Boolean = r match {
       
    32   case ZERO => false
       
    33   case ONE => true
       
    34   case CHAR(_) => false
       
    35   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    36   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    37   case STAR(_) => true
       
    38   case NTIMES(r, i) => if (i == 0) true else nullable(r)
       
    39 }
       
    40 
       
    41 def der (c: Char, r: Rexp) : Rexp = r match {
       
    42   case ZERO => ZERO
       
    43   case ONE => ZERO
       
    44   case CHAR(d) => if (c == d) ONE else ZERO
       
    45   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    46   case SEQ(r1, r2) => 
       
    47     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    48     else SEQ(der(c, r1), r2)
       
    49   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    50   case NTIMES(r, i) => 
       
    51     if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
       
    52 }
       
    53 
       
    54 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    55   case Nil => r
       
    56   case c::s => ders(s, simp(der(c, r)))
       
    57 }
       
    58 
       
    59 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
       
    60 
       
    61 
       
    62 //one or zero
       
    63 def OPT(r: Rexp) = ALT(r, ONE)
       
    64 
       
    65 def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
       
    66 
       
    67 def time_needed[T](i: Int, code: => T) = {
       
    68   val start = System.nanoTime()
       
    69   for (j <- 1 to i) code
       
    70   val end = System.nanoTime()
       
    71   (end - start)/(i * 1.0e9)
       
    72 }
       
    73 
       
    74 /*
       
    75 for (i <- 1 to 9001 by 1000) {
       
    76   println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
       
    77 }
       
    78 */
       
    79 
       
    80 // some convenience for typing in regular expressions
       
    81 import scala.language.implicitConversions    
       
    82 import scala.language.reflectiveCalls 
       
    83 
       
    84 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
    85   case Nil => ONE
       
    86   case c::Nil => CHAR(c)
       
    87   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    88 }
       
    89 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    90 
       
    91 implicit def RexpOps (r: Rexp) = new {
       
    92   def | (s: Rexp) = ALT(r, s)
       
    93   def % = STAR(r)
       
    94   def ~ (s: Rexp) = SEQ(r, s)
       
    95 }
       
    96 
       
    97 implicit def stringOps (s: String) = new {
       
    98   def | (r: Rexp) = ALT(s, r)
       
    99   def | (r: String) = ALT(s, r)
       
   100   def % = STAR(s)
       
   101   def ~ (r: Rexp) = SEQ(s, r)
       
   102   def ~ (r: String) = SEQ(s, r)
       
   103 }
       
   104 
       
   105 
       
   106 def PLUS(r: Rexp) = SEQ(r, STAR(r))
       
   107 def RANGE(s: List[Char]) : Rexp = s match {
       
   108   case Nil => ZERO
       
   109   case c::s => ALT(CHAR(c), RANGE(s))
       
   110 } 
       
   111 def NMTIMES(r: Rexp, n: Int, m: Int) : Rexp =
       
   112   if (n >= m) NTIMES(r, n) else ALT(NTIMES(r, m), NMTIMES(r, n, m - 1))
       
   113 
       
   114 
       
   115 println("Coursework:")
       
   116 val REG1 = PLUS(PLUS("a" ~ "a" ~ "a"))
       
   117 val REG2 = PLUS(PLUS("aaaaaaaaaaaaaaaaaaa" ~ OPT("a")))  // 19 as ~ a?
       
   118 
       
   119 
       
   120 //40 * aaa matches
       
   121 //43 * aaa + aa does not
       
   122 //45 * aaa + a
       
   123 
       
   124 println("EVIL1:")
       
   125 println(matches(REG1, "aaa" * 40))
       
   126 println(matches(REG1, "aaa" * 43 + "aa"))
       
   127 println(matches(REG1, "aaa" * 45 + "a"))
       
   128 println("EVIL2:")
       
   129 println(matches(REG2, "aaa" * 40))
       
   130 println(matches(REG2, "aaa" * 43 + "aa"))
       
   131 println(matches(REG2, "aaa" * 45 + "a"))
       
   132 
       
   133 println("EMAIL:")
       
   134 val LOWERCASE = "abcdefghijklmnopqrstuvwxyz"
       
   135 val DIGITS = "0123456789"
       
   136 val EMAIL = PLUS(RANGE((LOWERCASE + DIGITS + "_" + "." + "-").toList)) ~ "@" ~ 
       
   137             PLUS(RANGE((LOWERCASE + DIGITS + "." + "-").toList)) ~ "." ~
       
   138             NMTIMES(RANGE((LOWERCASE + ".").toList), 2, 6)
       
   139 
       
   140 val my_email = "christian.urban@kcl.ac.uk"
       
   141 
       
   142 println(matches(EMAIL, my_email))
       
   143 
       
   144 println(matches(NTIMES("a", 2), "a"))
       
   145 println(matches(NTIMES("a", 2), "aa"))
       
   146 println(matches(NTIMES("a", 2), "aaa"))
       
   147 
       
   148 println(matches(NMTIMES("a", 2, 3), "a"))
       
   149 println(matches(NMTIMES("a", 2, 3), "aa"))
       
   150 println(matches(NMTIMES("a", 2, 3), "aaa"))
       
   151 println(matches(NMTIMES("a", 2, 3), "aaaa"))