progs/re.scala
changeset 68 8da9e0c16194
parent 62 2151c77e1e24
equal deleted inserted replaced
67:ca5884c2e3bd 68:8da9e0c16194
       
     1 // Part 1 about Regular Expression Matching
       
     2 //==========================================
     1 
     3 
     2 abstract class Rexp
     4 abstract class Rexp
     3 case object ZERO extends Rexp
     5 case object ZERO extends Rexp
     4 case object ONE extends Rexp
     6 case object ONE extends Rexp
     5 case class CHAR(c: Char) extends Rexp
     7 case class CHAR(c: Char) extends Rexp
     6 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
     8 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
     7 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
     9 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
     8 case class STAR(r: Rexp) extends Rexp 
    10 case class STAR(r: Rexp) extends Rexp             // star
     9 
    11 
    10 // nullable function: tests whether the regular 
    12 
    11 // expression can recognise the empty string
    13 // some convenience for typing in regular expressions
    12 def nullable (r: Rexp) : Boolean = r match {
    14 
    13   case ZERO => false
    15 import scala.language.implicitConversions    
    14   case ONE => true
    16 import scala.language.reflectiveCalls 
    15   case CHAR(_) => false
    17 
    16   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    18 def charlist2rexp(s: List[Char]): Rexp = s match {
    17   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    19   case Nil => ONE
    18   case STAR(_) => true
    20   case c::Nil => CHAR(c)
       
    21   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    22 }
       
    23 implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
       
    24 
       
    25 implicit def RexpOps (r: Rexp) = new {
       
    26   def | (s: Rexp) = ALT(r, s)
       
    27   def % = STAR(r)
       
    28   def ~ (s: Rexp) = SEQ(r, s)
    19 }
    29 }
    20 
    30 
    21 // derivative of a regular expression w.r.t. a character
    31 implicit def stringOps (s: String) = new {
    22 def der (c: Char, r: Rexp) : Rexp = r match {
    32   def | (r: Rexp) = ALT(s, r)
    23   case ZERO => ZERO
    33   def | (r: String) = ALT(s, r)
    24   case ONE => ZERO
    34   def % = STAR(s)
    25   case CHAR(d) => if (c == d) ONE else ZERO
    35   def ~ (r: Rexp) = SEQ(s, r)
    26   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    36   def ~ (r: String) = SEQ(s, r)
    27   case SEQ(r1, r2) => 
       
    28     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    29     else SEQ(der(c, r1), r2)
       
    30   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
       
    31 }
    37 }
    32 
    38 
    33 def simp(r: Rexp) : Rexp = r match {
    39 // (1a) Complete the function nullable according to
    34   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    40 // the definition given in the coursework; this 
    35     case (ZERO, r2s) => r2s
    41 // function checks whether a regular expression
    36     case (r1s, ZERO) => r1s
    42 // can match the empty string
    37     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
    43 
    38   }
    44 def nullable (r: Rexp) : Boolean = ...
    39   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
       
    40     case (ZERO, _) => ZERO
       
    41     case (_, ZERO) => ZERO
       
    42     case (ONE, r2s) => r2s
       
    43     case (r1s, ONE) => r1s
       
    44     case (r1s, r2s) => SEQ(r1s, r2s)
       
    45   }
       
    46   case r => r
       
    47 }
       
    48 
    45 
    49 
    46 
    50 // derivative w.r.t. a string (iterates der)
    47 // (1b) Complete the function der according to
    51 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    48 // the definition given in the coursework; this
    52   case Nil => r
    49 // function calculates the derivative of a 
    53   case c::s => ders(s, simp(der(c, r)))
    50 // regular expression w.r.t. a character
    54 }
    51 
       
    52 def der (c: Char, r: Rexp) : Rexp = ...
       
    53 
       
    54 // (1c) Complete the function der according to
       
    55 // the specification given in the coursework; this
       
    56 // function simplifies a regular expression;
       
    57 // however it does not simplify inside STAR-regular
       
    58 // expressions
       
    59 
       
    60 def simp(r: Rexp) : Rexp = ... 
       
    61 
       
    62 // (1d) Complete the two functions below; the first 
       
    63 // calculates the derivative w.r.t. a string; the second
       
    64 // is the regular expression matcher taking a regular
       
    65 // expression and a string and checks whether the
       
    66 // string matches the regular expression
       
    67 
       
    68 def ders (s: List[Char], r: Rexp) : Rexp = ... 
       
    69 
       
    70 def matcher(r: Rexp, s: String): Boolean = ...
    55 
    71 
    56 
    72 
    57 // main matcher function
    73 // (1e) Complete the function below: it searches (from the left to 
    58 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
    74 // right) in string s1 all the non-empty substrings that match the 
       
    75 // regular expression -- these substrings are assumed to be
       
    76 // the longest substrings matched by the regular expression and
       
    77 // assumed to be non-overlapping. All these substrings in s1 are replaced
       
    78 // by s2.
    59 
    79 
    60 //one or zero
    80 def replace(r: Rexp, s1: String, s2: String): String = ...
    61 def OPT(r: Rexp) = ALT(r, ONE)
       
    62 
    81 
    63 //evil regular expressions
    82 
    64 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
    83 
    65 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    84 // some testing data
       
    85 // the supposedly 'evil' regular expression (a*)* b
       
    86 /*
       
    87 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
       
    88 println(matcher(EVIL, "a" * 1000 ++ "b"))
       
    89 println(matcher(EVIL, "a" * 1000))
    66 
    90 
    67 
    91 
    68 def time_needed[T](i: Int, code: => T) = {
    92 def time_needed[T](i: Int, code: => T) = {
    69   val start = System.nanoTime()
    93   val start = System.nanoTime()
    70   for (j <- 1 to i) code
    94   for (j <- 1 to i) code
    71   val end = System.nanoTime()
    95   val end = System.nanoTime()
    72   (end - start)/(i * 1.0e9)
    96   (end - start)/(i * 1.0e9)
    73 }
    97 }
    74 
    98 
       
    99 for (i <- 1 to 5000001 by 500000) {
       
   100   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
       
   101 }
       
   102 */
    75 
   103 
    76 //test: (a?{n}) (a{n})
       
    77 for (i <- 1 to 8001 by 1000) {
       
    78   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
       
    79 }
       
    80 
   104 
    81 for (i <- 1 to 8001 by 1000) {
       
    82   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
       
    83 }
       
    84 
       
    85 //test: (a*)* b
       
    86 for (i <- 1 to 7000001 by 500000) {
       
    87   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
       
    88 }
       
    89 
       
    90 for (i <- 1 to 7000001 by 500000) {
       
    91   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
       
    92 }
       
    93