progs/re_sol.scala
changeset 90 d77af4aca939
child 98 8f03f0dc3065
equal deleted inserted replaced
89:fac25de665d2 90:d77af4aca939
       
     1 // Part 1 about Regular Expression Matching
       
     2 //==========================================
       
     3 
       
     4 abstract class Rexp
       
     5 case object ZERO extends Rexp
       
     6 case object ONE extends Rexp
       
     7 case class CHAR(c: Char) extends Rexp
       
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
       
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    10 case class STAR(r: Rexp) extends Rexp 
       
    11 
       
    12 // some convenience for typing in regular expressions
       
    13 
       
    14 import scala.language.implicitConversions    
       
    15 import scala.language.reflectiveCalls 
       
    16 
       
    17 def charlist2rexp(s: List[Char]): Rexp = s match {
       
    18   case Nil => ONE
       
    19   case c::Nil => CHAR(c)
       
    20   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    21 }
       
    22 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
       
    23 
       
    24 implicit def RexpOps (r: Rexp) = new {
       
    25   def | (s: Rexp) = ALT(r, s)
       
    26   def % = STAR(r)
       
    27   def ~ (s: Rexp) = SEQ(r, s)
       
    28 }
       
    29 
       
    30 implicit def stringOps (s: String) = new {
       
    31   def | (r: Rexp) = ALT(s, r)
       
    32   def | (r: String) = ALT(s, r)
       
    33   def % = STAR(s)
       
    34   def ~ (r: Rexp) = SEQ(s, r)
       
    35   def ~ (r: String) = SEQ(s, r)
       
    36 }
       
    37 
       
    38 // (1a) Complete the function nullable according to
       
    39 // the definition given in the coursework; this 
       
    40 // function checks whether a regular expression
       
    41 // can match the empty string
       
    42 
       
    43 def nullable (r: Rexp) : Boolean = r match {
       
    44   case ZERO => false
       
    45   case ONE => true
       
    46   case CHAR(_) => false
       
    47   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    48   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    49   case STAR(_) => true
       
    50 }
       
    51 
       
    52 // (1b) Complete the function der according to
       
    53 // the definition given in the coursework; this
       
    54 // function calculates the derivative of a 
       
    55 // regular expression w.r.t. a character
       
    56 
       
    57 def der (c: Char, r: Rexp) : Rexp = r match {
       
    58   case ZERO => ZERO
       
    59   case ONE => ZERO
       
    60   case CHAR(d) => if (c == d) ONE else ZERO
       
    61   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    62   case SEQ(r1, r2) => 
       
    63     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    64     else SEQ(der(c, r1), r2)
       
    65   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
       
    66 }
       
    67 
       
    68 // (1c) Complete the function der according to
       
    69 // the specification given in the coursework; this
       
    70 // function simplifies a regular expression;
       
    71 // however it does not simplify inside STAR-regular
       
    72 // expressions
       
    73 
       
    74 def simp(r: Rexp) : Rexp = r match {
       
    75   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
       
    76     case (ZERO, r2s) => r2s
       
    77     case (r1s, ZERO) => r1s
       
    78     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
       
    79   }
       
    80   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
       
    81     case (ZERO, _) => ZERO
       
    82     case (_, ZERO) => ZERO
       
    83     case (ONE, r2s) => r2s
       
    84     case (r1s, ONE) => r1s
       
    85     case (r1s, r2s) => SEQ(r1s, r2s)
       
    86   }
       
    87   case r => r
       
    88 }
       
    89 
       
    90 // (1d) Complete the two functions below; the first 
       
    91 // calculates the derivative w.r.t. a string; the second
       
    92 // is the regular expression matcher taking a regular
       
    93 // expression and a string and checks whether the
       
    94 // string matches the regular expression
       
    95 
       
    96 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    97   case Nil => r
       
    98   case c::s => ders(s, simp(der(c, r)))
       
    99 }
       
   100 
       
   101 // main matcher function
       
   102 def matcher(r: Rexp, s: String): Boolean = nullable(ders(s.toList, r))
       
   103 
       
   104 
       
   105 // (1e) Complete the function below: it searches (from the left to 
       
   106 // right) in string s1 all the non-empty substrings that match the 
       
   107 // regular expression -- these substrings are assumed to be
       
   108 // the longest substrings matched by the regular expression and
       
   109 // assumed to be non-overlapping. All these substrings in s1 are replaced
       
   110 // by s2.
       
   111 
       
   112 
       
   113 
       
   114 def splits(s: String): List[(String, String)] =
       
   115   (for (i <- (1 to s.length).toList) yield s.splitAt(i)).reverse
       
   116   
       
   117 splits("abcde")
       
   118 splits("")
       
   119 
       
   120 def first(r: Rexp, lst: List[(String, String)]): Option[String] = lst match {
       
   121   case Nil => None
       
   122   case (s1, s2)::xs => if (matcher(r, s1)) Some(s2) else first(r, xs)
       
   123 }
       
   124  
       
   125 "abcd".head
       
   126 
       
   127 def replace(r: Rexp, s1: String, s2: String): String = first(r, splits(s1)) match {
       
   128   case None if (s1 == "") => ""
       
   129   case None => s1.head.toString ++ replace(r, s1.tail, s2)
       
   130   case Some(s) => s2 ++ replace(r, s, s2) 
       
   131 }
       
   132  
       
   133 val s1 =  "aabbbaaaaaaabaaaaabbaaaabb"
       
   134 val r: Rexp = "aa".% | "bb"
       
   135 splits(s1)
       
   136 first(r, splits(s1))
       
   137 
       
   138 replace(r, s1, "c")
       
   139 
       
   140 splits("bb")
       
   141 first(r, splits("bb"))
       
   142 replace(r, "abb", "c")
       
   143 replace(r, "abb", "")
       
   144 
       
   145 replace("aa".% | "bb", "abba" , "")
       
   146 
       
   147 val r = SEQ("a",SEQ("b", "c"))
       
   148 val r = SEQ(SEQ("a", "b"), "c")
       
   149 
       
   150 val rm = der('a', r)
       
   151 der('b', r)
       
   152 der('c', r)
       
   153 
       
   154 der('a', rm)
       
   155 val rn = der('b', rm)
       
   156 der('c', rm)
       
   157 
       
   158 der('a', rn)
       
   159 der('b', rn)
       
   160 der('c', rn)
       
   161 
       
   162 // some testing data
       
   163 // the supposedly 'evil' regular expression (a*)* b
       
   164 
       
   165 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
       
   166 
       
   167 ders(List.fill(5)('a') , EVIL)
       
   168 ders(List.fill(1)('b') , EVIL)
       
   169 matcher(EVIL, "a" * 15 ++ "b")
       
   170 matcher(EVIL, "b")
       
   171 println(matcher(EVIL, "a" * 1000 ++ "b"))
       
   172 println(matcher(EVIL, "a" * 1000))
       
   173 
       
   174 
       
   175 def time_needed[T](i: Int, code: => T) = {
       
   176   val start = System.nanoTime()
       
   177   for (j <- 1 to i) code
       
   178   val end = System.nanoTime()
       
   179   (end - start)/(i * 1.0e9)
       
   180 }
       
   181 
       
   182 for (i <- 1 to 5000001 by 500000) {
       
   183   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
       
   184 }
       
   185 
       
   186 
       
   187