marking4/re.scala
changeset 245 975d34506e88
parent 227 b5f3e814a710
child 288 65731df141a5
equal deleted inserted replaced
244:a359976a6f3e 245:975d34506e88
     1 // Part 1 about Regular Expression Matching
     1 // Part 1 about Regular Expression Matching
     2 //==========================================
     2 //==========================================
     3 
     3 
     4 object CW8a {
     4 //object CW9a {
     5 
     5 
       
     6 // Regular Expressions
     6 abstract class Rexp
     7 abstract class Rexp
     7 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
     8 case object ONE extends Rexp
     9 case object ONE extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    36   def % = STAR(s)
    37   def % = STAR(s)
    37   def ~ (r: Rexp) = SEQ(s, r)
    38   def ~ (r: Rexp) = SEQ(s, r)
    38   def ~ (r: String) = SEQ(s, r)
    39   def ~ (r: String) = SEQ(s, r)
    39 }
    40 }
    40 
    41 
    41 // (1a) Complete the function nullable according to
    42 // (1) Complete the function nullable according to
    42 // the definition given in the coursework; this 
    43 // the definition given in the coursework; this 
    43 // function checks whether a regular expression
    44 // function checks whether a regular expression
    44 // can match the empty string
    45 // can match the empty string and Returns a boolean
       
    46 // accordingly.
    45 
    47 
    46 def nullable (r: Rexp) : Boolean = r match {
    48 def nullable (r: Rexp) : Boolean = r match {
    47   case ZERO => false
    49   case ZERO => false
    48   case ONE => true
    50   case ONE => true
    49   case CHAR(_) => false
    51   case CHAR(_) => false
    50   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    52   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    51   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    53   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    52   case STAR(_) => true
    54   case STAR(_) => true
    53 }
    55 }
    54 
    56 
    55 // (1b) Complete the function der according to
    57 // (2) Complete the function der according to
    56 // the definition given in the coursework; this
    58 // the definition given in the coursework; this
    57 // function calculates the derivative of a 
    59 // function calculates the derivative of a 
    58 // regular expression w.r.t. a character
    60 // regular expression w.r.t. a character.
    59 
    61 
    60 def der (c: Char, r: Rexp) : Rexp = r match {
    62 def der (c: Char, r: Rexp) : Rexp = r match {
    61   case ZERO => ZERO
    63   case ZERO => ZERO
    62   case ONE => ZERO
    64   case ONE => ZERO
    63   case CHAR(d) => if (c == d) ONE else ZERO
    65   case CHAR(d) => if (c == d) ONE else ZERO
    66     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    68     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    67     else SEQ(der(c, r1), r2)
    69     else SEQ(der(c, r1), r2)
    68   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    70   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    69 }
    71 }
    70 
    72 
    71 // (1c) Complete the function der according to
    73 // (3) Complete the simp function according to
    72 // the specification given in the coursework; this
    74 // the specification given in the coursework; this
    73 // function simplifies a regular expression;
    75 // function simplifies a regular expression from
    74 // however it does not simplify inside STAR-regular
    76 // the inside out, like you would simplify arithmetic 
    75 // expressions
    77 // expressions; however it does not simplify inside 
       
    78 // STAR-regular expressions.
    76 
    79 
    77 def simp(r: Rexp) : Rexp = r match {
    80 def simp(r: Rexp) : Rexp = r match {
    78   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    81   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    79     case (ZERO, r2s) => r2s
    82     case (ZERO, r2s) => r2s
    80     case (r1s, ZERO) => r1s
    83     case (r1s, ZERO) => r1s
    88     case (r1s, r2s) => SEQ(r1s, r2s)
    91     case (r1s, r2s) => SEQ(r1s, r2s)
    89   }
    92   }
    90   case r => r
    93   case r => r
    91 }
    94 }
    92 
    95 
    93 // (1d) Complete the two functions below; the first 
    96 
       
    97 // (4) Complete the two functions below; the first 
    94 // calculates the derivative w.r.t. a string; the second
    98 // calculates the derivative w.r.t. a string; the second
    95 // is the regular expression matcher taking a regular
    99 // is the regular expression matcher taking a regular
    96 // expression and a string and checks whether the
   100 // expression and a string and checks whether the
    97 // string matches the regular expression
   101 // string matches the regular expression.
    98 
   102 
    99 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   100   case Nil => r
   104   case Nil => r
   101   case c::s => ders(s, simp(der(c, r)))
   105   case c::s => ders(s, simp(der(c, r)))
   102 }
   106 }
   103 
   107 
   104 // main matcher function
   108 // main matcher function
   105 def matcher(r: Rexp, s: String): Boolean = nullable(ders(s.toList, r))
   109 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))
   106 
   110 
   107 // (1e) Complete the size function for regular
   111 // (5) Complete the size function for regular
   108 // expressions  according to the specification 
   112 // expressions according to the specification 
   109 // given in the coursework.
   113 // given in the coursework.
       
   114 
   110 
   115 
   111 def size(r: Rexp): Int = r match {
   116 def size(r: Rexp): Int = r match {
   112   case ZERO => 1
   117   case ZERO => 1
   113   case ONE => 1
   118   case ONE => 1
   114   case CHAR(_) => 1
   119   case CHAR(_) => 1
   118 }
   123 }
   119 
   124 
   120 
   125 
   121 
   126 
   122 // some testing data
   127 // some testing data
   123 /*
   128 
   124 matcher(("a" ~ "b") ~ "c", "abc")  // => true
   129 //matcher(("a" ~ "b") ~ "c", "abc")  // => true
   125 matcher(("a" ~ "b") ~ "c", "ab")   // => false
   130 //matcher(("a" ~ "b") ~ "c", "ab")   // => false
   126 
   131 
   127 // the supposedly 'evil' regular expression (a*)* b
   132 // the supposedly 'evil' regular expression (a*)* b
   128 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   133 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   129 
   134 
   130 matcher(EVIL, "a" * 1000 ++ "b")   // => true
   135 //matcher(EVIL, "a" * 1000 ++ "b")   // => true
   131 matcher(EVIL, "a" * 1000)          // => false
   136 //matcher(EVIL, "a" * 1000)          // => false
   132 
   137 
   133 // size without simplifications
   138 // size without simplifications
   134 size(der('a', der('a', EVIL)))             // => 28
   139 //size(der('a', der('a', EVIL)))             // => 28
   135 size(der('a', der('a', der('a', EVIL))))   // => 58
   140 //size(der('a', der('a', der('a', EVIL))))   // => 58
   136 
   141 
   137 // size with simplification
   142 // size with simplification
   138 size(simp(der('a', der('a', EVIL))))           // => 8
   143 //size(simp(der('a', der('a', EVIL))))           // => 8
   139 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   144 //size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   140 
   145 
   141 // Java needs around 30 seconds for matching 28 a's with EVIL. 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL. 
       
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in
       
   148 // around 30 seconds.
   142 //
   149 //
   143 // Lets see how long it takes to match strings with 
   150 // Lets see how long it takes to match strings with 
   144 // 0.5 Million a's...it should be in the range of some
   151 // 5 Million a's...it should be in the range of a 
   145 // seconds.
   152 // couple of seconds.
   146 
   153 
   147 def time_needed[T](i: Int, code: => T) = {
   154 def time_needed[T](i: Int, code: => T) = {
   148   val start = System.nanoTime()
   155   val start = System.nanoTime()
   149   for (j <- 1 to i) code
   156   for (j <- 1 to i) code
   150   val end = System.nanoTime()
   157   val end = System.nanoTime()
   151   (end - start)/(i * 1.0e9)
   158   (end - start)/(i * 1.0e9)
   152 }
   159 }
   153 
   160 
   154 for (i <- 0 to 5000000 by 500000) {
   161 //for (i <- 0 to 5000000 by 500000) {
   155   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   162 //  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") 
   156 }
   163 //}
   157 */
       
   158 
   164 
   159 }
   165 // another "power" test case 
       
   166 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
       
   167 
       
   168 // the Iterator produces the rexp
       
   169 //
       
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
       
   171 //
       
   172 //    where SEQ is nested 100 times.
       
   173  
       
   174 
       
   175 
       
   176 //}