main_templates3/re.scala
changeset 425 6e990ae2c6a3
parent 421 864107857d27
child 474 8a61bcd51ec3
equal deleted inserted replaced
424:3c81352ec565 425:6e990ae2c6a3
     1 // Main Part 3 about Regular Expression Matching
     1 // Main Part 3 about Regular Expression Matching
     2 //=============================================
     2 //==============================================
     3 
     3 
     4 object M3 {
     4 object M3 {
     5 
     5 
     6 // Regular Expressions
       
     7 abstract class Rexp
     6 abstract class Rexp
     8 case object ZERO extends Rexp
     7 case object ZERO extends Rexp
     9 case object ONE extends Rexp
     8 case object ONE extends Rexp
    10 case class CHAR(c: Char) extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    11 case class ALTs(rs: List[Rexp]) extends Rexp      // alternatives 
    10 case class ALTs(rs: List[Rexp]) extends Rexp  // alternatives 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
    11 case class SEQs(rs: List[Rexp]) extends Rexp  // sequences
    13 case class STAR(r: Rexp) extends Rexp             // star
    12 case class STAR(r: Rexp) extends Rexp         // star
    14 
    13 
    15 //the usual binary choice can be defined in terms of ALTs
    14 
       
    15 //the usual binary choice and binary sequence can be defined 
       
    16 //in terms of ALTs and SEQs
    16 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
    17 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2))
       
    18 def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2))
    17 
    19 
    18 
    20 
    19 // some convenience for typing regular expressions
    21 // some convenience for typing regular expressions
    20 
       
    21 import scala.language.implicitConversions    
    22 import scala.language.implicitConversions    
    22 import scala.language.reflectiveCalls 
    23 import scala.language.reflectiveCalls 
    23 
    24 
    24 def charlist2rexp(s: List[Char]): Rexp = s match {
    25 def charlist2rexp(s: List[Char]): Rexp = s match {
    25   case Nil => ONE
    26   case Nil => ONE
    40   def % = STAR(s)
    41   def % = STAR(s)
    41   def ~ (r: Rexp) = SEQ(s, r)
    42   def ~ (r: Rexp) = SEQ(s, r)
    42   def ~ (r: String) = SEQ(s, r)
    43   def ~ (r: String) = SEQ(s, r)
    43 }
    44 }
    44 
    45 
       
    46 // examples for the implicits:
       
    47 // ALT(CHAR('a'), CHAR('b'))
       
    48 // val areg : Rexp = "a" | "b"
    45 
    49 
    46 // ALT(CHAR('a'), CHAR('b'))
    50 // SEQ(CHAR('a'), CHAR('b')) 
    47 // val reg : Rexp = "a" | "b" 
    51 // val sreg : Rexp = "a" ~ "b"
    48 
    52 
    49 
    53 
    50 // (1) Complete the function nullable according to
    54 // ADD YOUR CODE BELOW
    51 // the definition given in the coursework; this 
    55 //======================
    52 // function checks whether a regular expression
       
    53 // can match the empty string and Returns a boolean
       
    54 // accordingly.
       
    55 
    56 
       
    57 // (1)
    56 def nullable (r: Rexp) : Boolean = ???
    58 def nullable (r: Rexp) : Boolean = ???
    57 
    59 
    58 
    60 // (2) 
    59 // (2) Complete the function der according to
       
    60 // the definition given in the coursework; this
       
    61 // function calculates the derivative of a 
       
    62 // regular expression w.r.t. a character.
       
    63 
       
    64 def der (c: Char, r: Rexp) : Rexp = ???
    61 def der (c: Char, r: Rexp) : Rexp = ???
    65 
    62 
       
    63 // (3) 
       
    64 def denest(rs: List[Rexp]) : List[Rexp] = ???
    66 
    65 
    67 // (3) Implement the flatten function flts. It
    66 // (4)
    68 // deletes 0s from a list of regular expressions
    67 def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ???
    69 // and also 'spills out', or flattens, nested 
       
    70 // ALTernativeS.
       
    71 
    68 
    72 def flts(rs: List[Rexp]) : List[Rexp] = ???
    69 // (5)
       
    70 def ALTs_smart(rs: List[Rexp]) : Rexp = ???
       
    71 def SEQs_smart(rs: List[Rexp]) : Rexp = ???
    73 
    72 
    74 
    73 // (6)
    75 
       
    76 // (4) Complete the simp function according to
       
    77 // the specification given in the coursework description; 
       
    78 // this function simplifies a regular expression from
       
    79 // the inside out, like you would simplify arithmetic 
       
    80 // expressions; however it does not simplify inside 
       
    81 // STAR-regular expressions. Use the _.distinct and 
       
    82 // flts functions.
       
    83 
       
    84 def simp(r: Rexp) : Rexp = ???
    74 def simp(r: Rexp) : Rexp = ???
    85 
    75 
    86 
    76 // (7)
    87 // (5) Complete the two functions below; the first 
       
    88 // calculates the derivative w.r.t. a string; the second
       
    89 // is the regular expression matcher taking a regular
       
    90 // expression and a string and checks whether the
       
    91 // string matches the regular expression
       
    92 
       
    93 def ders (s: List[Char], r: Rexp) : Rexp = ???
    77 def ders (s: List[Char], r: Rexp) : Rexp = ???
    94 
       
    95 def matcher(r: Rexp, s: String): Boolean = ???
    78 def matcher(r: Rexp, s: String): Boolean = ???
    96 
    79 
    97 
    80 // (8) 
    98 // (6) Complete the size function for regular
       
    99 // expressions according to the specification 
       
   100 // given in the coursework.
       
   101 
       
   102 def size(r: Rexp): Int = ???
    81 def size(r: Rexp): Int = ???
   103 
    82 
   104 
    83 
   105 // some testing data
    84 // Some testing data
       
    85 //===================
       
    86 /*
   106 
    87 
   107 /*
    88 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))   // => ALTs(List(ONE, CHAR(a)))
       
    89 simp(((CHAR('a') | ZERO) ~ ONE) | 
       
    90      (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))   // => CHAR(a)
       
    91 
       
    92 matcher(("a" ~ "b") ~ "c", "ab")   // => false
   108 matcher(("a" ~ "b") ~ "c", "abc")  // => true
    93 matcher(("a" ~ "b") ~ "c", "abc")  // => true
   109 matcher(("a" ~ "b") ~ "c", "ab")   // => false
    94 
   110 
    95 
   111 // the supposedly 'evil' regular expression (a*)* b
    96 // the supposedly 'evil' regular expression (a*)* b
   112 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
    97 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
   113 
    98 
       
    99 matcher(EVIL, "a" * 1000)          // => false
   114 matcher(EVIL, "a" * 1000 ++ "b")   // => true
   100 matcher(EVIL, "a" * 1000 ++ "b")   // => true
   115 matcher(EVIL, "a" * 1000)          // => false
   101 
   116 
   102 
   117 // size without simplifications
   103 // size without simplifications
   118 size(der('a', der('a', EVIL)))             // => 28
   104 size(der('a', der('a', EVIL)))             // => 36
   119 size(der('a', der('a', der('a', EVIL))))   // => 58
   105 size(der('a', der('a', der('a', EVIL))))   // => 83
   120 
   106 
   121 // size with simplification
   107 // size with simplification
   122 size(simp(der('a', der('a', EVIL))))           // => 8
   108 size(simp(der('a', der('a', EVIL))))           // => 7
   123 size(simp(der('a', der('a', der('a', EVIL))))) // => 8
   109 size(simp(der('a', der('a', der('a', EVIL))))) // => 7
   124 
   110 
   125 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   111 // Python needs around 30 seconds for matching 28 a's with EVIL. 
   126 // Java 9 and later increase this to an "astonishing" 40000 a's in
   112 // Java 9 and later increase this to an "astonishing" 40000 a's in
   127 // 30 seconds.
   113 // 30 seconds.
   128 //
   114 //
   129 // Lets see how long it really takes to match strings with 
   115 // Lets see how long it really takes to match strings with 
   130 // 5 Million a's...it should be in the range of a couple
   116 // 5 Million a's...it should be in the range of a few
   131 // of seconds.
   117 // of seconds.
   132 
   118 
   133 def time_needed[T](i: Int, code: => T) = {
   119 def time_needed[T](i: Int, code: => T) = {
   134   val start = System.nanoTime()
   120   val start = System.nanoTime()
   135   for (j <- 1 to i) code
   121   for (j <- 1 to i) code