testing4/re.scala
changeset 329 8a34b2ebc8cc
parent 300 72688efdf17c
equal deleted inserted replaced
328:0e591f806290 329:8a34b2ebc8cc
     6 // Regular Expressions
     6 // Regular Expressions
     7 abstract class Rexp
     7 abstract class Rexp
     8 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
     9 case object ONE extends Rexp
     9 case object ONE extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
    13 case class STAR(r: Rexp) extends Rexp 
    13 case class STAR(r: Rexp) extends Rexp             // star
    14 
    14 
    15 // some convenience for typing in regular expressions
    15 
       
    16 // some convenience for typing regular expressions
    16 
    17 
    17 import scala.language.implicitConversions    
    18 import scala.language.implicitConversions    
    18 import scala.language.reflectiveCalls 
    19 import scala.language.reflectiveCalls 
    19 
       
    20 
    20 
    21 def charlist2rexp(s: List[Char]): Rexp = s match {
    21 def charlist2rexp(s: List[Char]): Rexp = s match {
    22   case Nil => ONE
    22   case Nil => ONE
    23   case c::Nil => CHAR(c)
    23   case c::Nil => CHAR(c)
    24   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    24   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    37   def % = STAR(s)
    37   def % = STAR(s)
    38   def ~ (r: Rexp) = SEQ(s, r)
    38   def ~ (r: Rexp) = SEQ(s, r)
    39   def ~ (r: String) = SEQ(s, r)
    39   def ~ (r: String) = SEQ(s, r)
    40 }
    40 }
    41 
    41 
    42 // (1) Complete the function nullable according to
    42 // (5) Complete the function nullable according to
    43 // the definition given in the coursework; this 
    43 // the definition given in the coursework; this 
    44 // function checks whether a regular expression
    44 // function checks whether a regular expression
    45 // can match the empty string and Returns a boolean
    45 // can match the empty string and Returns a boolean
    46 // accordingly.
    46 // accordingly.
    47 
    47 
    48 def nullable (r: Rexp) : Boolean = r match {
    48 def nullable (r: Rexp) : Boolean = {
    49   case ZERO => false
    49 	r match {
    50   case ONE => true
    50 		case ZERO => false
    51   case CHAR(_) => false
    51 		case ONE => true
    52   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    52 		case CHAR(c) => false
    53   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    53 		case ALT(r1, r2) => (nullable(r1) || nullable(r2))
    54   case STAR(_) => true
    54 		case SEQ(r1, r2) => (nullable(r1) && nullable(r2))
       
    55 		case STAR(r) => true
       
    56 	}
    55 }
    57 }
    56 
    58 
    57 // (2) Complete the function der according to
    59 // (6) Complete the function der according to
    58 // the definition given in the coursework; this
    60 // the definition given in the coursework; this
    59 // function calculates the derivative of a 
    61 // function calculates the derivative of a 
    60 // regular expression w.r.t. a character.
    62 // regular expression w.r.t. a character.
    61 
    63 
    62 def der (c: Char, r: Rexp) : Rexp = r match {
    64 def der (c: Char, r: Rexp) : Rexp = {
    63   case ZERO => ZERO
    65 	r match {
    64   case ONE => ZERO
    66 		case ZERO => ZERO
    65   case CHAR(d) => if (c == d) ONE else ZERO
    67 		case ONE => ZERO
    66   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    68 		case CHAR(d) => if(d == c) ONE else ZERO
    67   case SEQ(r1, r2) => 
    69 		case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    68     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    70 		case SEQ(r1, r2) => if(nullable(r1)) {
    69     else SEQ(der(c, r1), r2)
    71 								(ALT(SEQ(der(c, r1), r2), der(c, r2)))
    70   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
    72 							} else {
       
    73 								SEQ(der(c, r1), r2)
       
    74 							}
       
    75 		case STAR(r) => SEQ(der(c, r), STAR(r))
       
    76 	}
    71 }
    77 }
    72 
    78 
    73 // (3) Complete the simp function according to
    79 
       
    80 // (7) Complete the simp function according to
    74 // the specification given in the coursework; this
    81 // the specification given in the coursework; this
    75 // function simplifies a regular expression from
    82 // function simplifies a regular expression from
    76 // the inside out, like you would simplify arithmetic 
    83 // the inside out, like you would simplify arithmetic 
    77 // expressions; however it does not simplify inside 
    84 // expressions; however it does not simplify inside 
    78 // STAR-regular expressions.
    85 // STAR-regular expressions.
    79 
    86 
    80 def simp(r: Rexp) : Rexp = r match {
    87 def simp(r: Rexp) : Rexp = {
    81   case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    88 	r match {
    82     case (ZERO, r2s) => r2s
    89 		case STAR(r) => STAR(r) // does not process r star
    83     case (r1s, ZERO) => r1s
    90 		case SEQ(r1, r2) => {
    84     case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
    91 			val x = (simp(r1), simp(r2))
    85   }
    92 			if(x._1 == ZERO) ZERO else
    86   case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
    93 			if(x._2 == ZERO) ZERO else
    87     case (ZERO, _) => ZERO
    94 			if(x._1 == ONE) simp(x._2) else 
    88     case (_, ZERO) => ZERO
    95 			if(x._2 == ONE) simp(x._1) else
    89     case (ONE, r2s) => r2s
    96 			if(x._1 == x._2) simp(x._2) else
    90     case (r1s, ONE) => r1s
    97 			SEQ(simp(x._1), simp(x._2))
    91     case (r1s, r2s) => SEQ(r1s, r2s)
    98 		}
    92   }
    99 		case ALT(r1, r2) => {
    93   case r => r
   100 			val x = (simp(r1), simp(r2))
       
   101 			if(x._1 == ZERO) simp(x._2) else
       
   102 			if(x._2 == ZERO) simp(x._1) else
       
   103 			if(x._1 == x._2) simp(x._2) else
       
   104 			ALT(simp(x._1), simp(x._2))
       
   105 		}
       
   106 		case r => r // if single regex, return it
       
   107 	}
    94 }
   108 }
    95 
   109 
    96 
   110 
    97 // (4) Complete the two functions below; the first 
   111 // (8) Complete the two functions below; the first 
    98 // calculates the derivative w.r.t. a string; the second
   112 // calculates the derivative w.r.t. a string; the second
    99 // is the regular expression matcher taking a regular
   113 // is the regular expression matcher taking a regular
   100 // expression and a string and checks whether the
   114 // expression and a string and checks whether the
   101 // string matches the regular expression.
   115 // string matches the regular expression
   102 
   116 
   103 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   117 def ders (s: List[Char], r: Rexp) : Rexp = {
   104   case Nil => r
   118 	s match {
   105   case c::s => ders(s, simp(der(c, r)))
   119 		case Nil => r
       
   120 		case c :: cs => ders(cs, simp(der(c,r)))
       
   121 	}
   106 }
   122 }
   107 
   123 
   108 // main matcher function
   124 def matcher(r: Rexp, s: String): Boolean = {
   109 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r))
   125 	val listOfCharacters = s.toList
       
   126 	val result = ders(listOfCharacters, r)
       
   127 	nullable(result)
       
   128 }
   110 
   129 
   111 // (5) Complete the size function for regular
   130 
       
   131 // (9) Complete the size function for regular
   112 // expressions according to the specification 
   132 // expressions according to the specification 
   113 // given in the coursework.
   133 // given in the coursework.
   114 
   134 
   115 
   135 def size(r: Rexp): Int = {
   116 def size(r: Rexp): Int = r match {
   136 	r match {
   117   case ZERO => 1
   137 		case ZERO => 1
   118   case ONE => 1
   138 		case ONE => 1
   119   case CHAR(_) => 1
   139 		case CHAR(c) => 1
   120   case ALT(r1, r2) => 1 + size(r1) + size (r2)
   140 		case ALT(r1, r2) => 1 + size(r1) + size(r2)
   121   case SEQ(r1, r2) => 1 + size(r1) + size (r2)
   141 		case SEQ(r1, r2) => 1 + size(r1) + size(r2)
   122   case STAR(r1) => 1 + size(r1)
   142 		case STAR(r) => 1 + size(r)
       
   143 	}
   123 }
   144 }
   124 
       
   125 
       
   126 
   145 
   127 // some testing data
   146 // some testing data
   128 
   147 
   129 //matcher(("a" ~ "b") ~ "c", "abc")  // => true
   148 /*
   130 //matcher(("a" ~ "b") ~ "c", "ab")   // => false
   149 matcher(("a" ~ "b") ~ "c", "abc")  // => true
       
   150 matcher(("a" ~ "b") ~ "c", "ab")   // => false
   131 
   151 
   132 // the supposedly 'evil' regular expression (a*)* b
   152 // the supposedly 'evil' regular expression (a*)* b
   133 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   153 // val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   134 
   154 
   135 //matcher(EVIL, "a" * 1000 ++ "b")   // => true
   155 matcher(EVIL, "a" * 1000 ++ "b")   // => true
   136 //matcher(EVIL, "a" * 1000)          // => false
   156 matcher(EVIL, "a" * 1000)          // => false
   137 
   157 
   138 // size without simplifications
   158 // size without simplifications
   139 //size(der('a', der('a', EVIL)))             // => 28
   159 size(der('a', der('a', EVIL)))             // => 28
   140 //size(der('a', der('a', der('a', EVIL))))   // => 58
   160 size(der('a', der('a', der('a', EVIL))))   // => 58
   141 
   161 
   142 // size with simplification
   162 // size with simplification
   143 //size(simp(der('a', der('a', EVIL))))           // => 8
   163 size(simp(der('a', der('a', EVIL))))           // => 8
   144 //size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   164 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   145 
   165 
   146 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   166 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   147 // Java 9 and later increase this to an "astonishing" 40000 a's in
   167 // Java 9 and later increase this to an "astonishing" 40000 a's in
   148 // around 30 seconds.
   168 // 30 seconds.
   149 //
   169 //
   150 // Lets see how long it takes to match strings with 
   170 // Lets see how long it really takes to match strings with 
   151 // 5 Million a's...it should be in the range of a 
   171 // 5 Million a's...it should be in the range of a couple
   152 // couple of seconds.
   172 // of seconds.
   153 
   173 
   154 def time_needed[T](i: Int, code: => T) = {
   174 def time_needed[T](i: Int, code: => T) = {
   155   val start = System.nanoTime()
   175   val start = System.nanoTime()
   156   for (j <- 1 to i) code
   176   for (j <- 1 to i) code
   157   val end = System.nanoTime()
   177   val end = System.nanoTime()
   158   (end - start)/(i * 1.0e9)
   178   (end - start)/(i * 1.0e9)
   159 }
   179 }
   160 
   180 
   161 //for (i <- 0 to 5000000 by 500000) {
   181 for (i <- 0 to 5000000 by 500000) {
   162 //  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))) + " secs.") 
   182   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
   163 //}
   183 }
   164 
   184 
   165 // another "power" test case 
   185 // another "power" test case 
   166 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE
   186 simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next) == ONE
   167 
   187 
   168 // the Iterator produces the rexp
   188 // the Iterator produces the rexp
   169 //
   189 //
   170 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
   190 //      SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE)
   171 //
   191 //
   172 //    where SEQ is nested 100 times.
   192 //    where SEQ is nested 50 times.
   173  
       
   174 
   193 
       
   194 */
   175 
   195 
   176 }
   196 }