1 // Main Part 3 about Regular Expression Matching |
1 // Main Part 3 about Regular Expression Matching |
2 //============================================= |
2 //============================================== |
3 |
3 |
4 object M3 { |
4 object M3 { |
5 |
5 |
6 // Regular Expressions |
|
7 abstract class Rexp |
6 abstract class Rexp |
8 case object ZERO extends Rexp |
7 case object ZERO extends Rexp |
9 case object ONE extends Rexp |
8 case object ONE extends Rexp |
10 case class CHAR(c: Char) extends Rexp |
9 case class CHAR(c: Char) extends Rexp |
11 case class ALTs(rs: List[Rexp]) extends Rexp // alternatives |
10 case class ALTs(rs: List[Rexp]) extends Rexp // alternatives |
12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence |
11 case class SEQs(rs: List[Rexp]) extends Rexp // sequences |
13 case class STAR(r: Rexp) extends Rexp // star |
12 case class STAR(r: Rexp) extends Rexp // star |
14 |
13 |
15 //the usual binary choice can be defined in terms of ALTs |
14 |
|
15 //the usual binary choice and binary sequence can be defined |
|
16 //in terms of ALTs and SEQs |
16 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) |
17 def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) |
|
18 def SEQ(r1: Rexp, r2: Rexp) = SEQs(List(r1, r2)) |
17 |
19 |
18 |
20 |
19 // some convenience for typing regular expressions |
21 // some convenience for typing regular expressions |
20 |
|
21 import scala.language.implicitConversions |
22 import scala.language.implicitConversions |
22 import scala.language.reflectiveCalls |
23 import scala.language.reflectiveCalls |
23 |
24 |
24 def charlist2rexp(s: List[Char]): Rexp = s match { |
25 def charlist2rexp(s: List[Char]): Rexp = s match { |
25 case Nil => ONE |
26 case Nil => ONE |
40 def % = STAR(s) |
41 def % = STAR(s) |
41 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: String) = SEQ(s, r) |
43 def ~ (r: String) = SEQ(s, r) |
43 } |
44 } |
44 |
45 |
|
46 // examples for the implicits: |
|
47 // ALT(CHAR('a'), CHAR('b')) |
|
48 // val areg : Rexp = "a" | "b" |
45 |
49 |
46 // ALT(CHAR('a'), CHAR('b')) |
50 // SEQ(CHAR('a'), CHAR('b')) |
47 // val reg : Rexp = "a" | "b" |
51 // val sreg : Rexp = "a" ~ "b" |
48 |
52 |
49 |
53 |
50 // (1) Complete the function nullable according to |
54 // ADD YOUR CODE BELOW |
51 // the definition given in the coursework; this |
55 //====================== |
52 // function checks whether a regular expression |
|
53 // can match the empty string and Returns a boolean |
|
54 // accordingly. |
|
55 |
56 |
|
57 // (1) |
56 def nullable (r: Rexp) : Boolean = ??? |
58 def nullable (r: Rexp) : Boolean = ??? |
57 |
59 |
58 |
60 // (2) |
59 // (2) Complete the function der according to |
|
60 // the definition given in the coursework; this |
|
61 // function calculates the derivative of a |
|
62 // regular expression w.r.t. a character. |
|
63 |
|
64 def der (c: Char, r: Rexp) : Rexp = ??? |
61 def der (c: Char, r: Rexp) : Rexp = ??? |
65 |
62 |
|
63 // (3) |
|
64 def denest(rs: List[Rexp]) : List[Rexp] = ??? |
66 |
65 |
67 // (3) Implement the flatten function flts. It |
66 // (4) |
68 // deletes 0s from a list of regular expressions |
67 def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = ??? |
69 // and also 'spills out', or flattens, nested |
|
70 // ALTernativeS. |
|
71 |
68 |
72 def flts(rs: List[Rexp]) : List[Rexp] = ??? |
69 // (5) |
|
70 def ALTs_smart(rs: List[Rexp]) : Rexp = ??? |
|
71 def SEQs_smart(rs: List[Rexp]) : Rexp = ??? |
73 |
72 |
74 |
73 // (6) |
75 |
|
76 // (4) Complete the simp function according to |
|
77 // the specification given in the coursework description; |
|
78 // this function simplifies a regular expression from |
|
79 // the inside out, like you would simplify arithmetic |
|
80 // expressions; however it does not simplify inside |
|
81 // STAR-regular expressions. Use the _.distinct and |
|
82 // flts functions. |
|
83 |
|
84 def simp(r: Rexp) : Rexp = ??? |
74 def simp(r: Rexp) : Rexp = ??? |
85 |
75 |
86 |
76 // (7) |
87 // (5) Complete the two functions below; the first |
|
88 // calculates the derivative w.r.t. a string; the second |
|
89 // is the regular expression matcher taking a regular |
|
90 // expression and a string and checks whether the |
|
91 // string matches the regular expression |
|
92 |
|
93 def ders (s: List[Char], r: Rexp) : Rexp = ??? |
77 def ders (s: List[Char], r: Rexp) : Rexp = ??? |
94 |
|
95 def matcher(r: Rexp, s: String): Boolean = ??? |
78 def matcher(r: Rexp, s: String): Boolean = ??? |
96 |
79 |
97 |
80 // (8) |
98 // (6) Complete the size function for regular |
|
99 // expressions according to the specification |
|
100 // given in the coursework. |
|
101 |
|
102 def size(r: Rexp): Int = ??? |
81 def size(r: Rexp): Int = ??? |
103 |
82 |
104 |
83 |
105 // some testing data |
84 // Some testing data |
|
85 //=================== |
|
86 /* |
106 |
87 |
107 /* |
88 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) // => ALTs(List(ONE, CHAR(a))) |
|
89 simp(((CHAR('a') | ZERO) ~ ONE) | |
|
90 (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO))) // => CHAR(a) |
|
91 |
|
92 matcher(("a" ~ "b") ~ "c", "ab") // => false |
108 matcher(("a" ~ "b") ~ "c", "abc") // => true |
93 matcher(("a" ~ "b") ~ "c", "abc") // => true |
109 matcher(("a" ~ "b") ~ "c", "ab") // => false |
94 |
110 |
95 |
111 // the supposedly 'evil' regular expression (a*)* b |
96 // the supposedly 'evil' regular expression (a*)* b |
112 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
97 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
113 |
98 |
|
99 matcher(EVIL, "a" * 1000) // => false |
114 matcher(EVIL, "a" * 1000 ++ "b") // => true |
100 matcher(EVIL, "a" * 1000 ++ "b") // => true |
115 matcher(EVIL, "a" * 1000) // => false |
101 |
116 |
102 |
117 // size without simplifications |
103 // size without simplifications |
118 size(der('a', der('a', EVIL))) // => 28 |
104 size(der('a', der('a', EVIL))) // => 36 |
119 size(der('a', der('a', der('a', EVIL)))) // => 58 |
105 size(der('a', der('a', der('a', EVIL)))) // => 83 |
120 |
106 |
121 // size with simplification |
107 // size with simplification |
122 size(simp(der('a', der('a', EVIL)))) // => 8 |
108 size(simp(der('a', der('a', EVIL)))) // => 7 |
123 size(simp(der('a', der('a', der('a', EVIL))))) // => 8 |
109 size(simp(der('a', der('a', der('a', EVIL))))) // => 7 |
124 |
110 |
125 // Python needs around 30 seconds for matching 28 a's with EVIL. |
111 // Python needs around 30 seconds for matching 28 a's with EVIL. |
126 // Java 9 and later increase this to an "astonishing" 40000 a's in |
112 // Java 9 and later increase this to an "astonishing" 40000 a's in |
127 // 30 seconds. |
113 // 30 seconds. |
128 // |
114 // |
129 // Lets see how long it really takes to match strings with |
115 // Lets see how long it really takes to match strings with |
130 // 5 Million a's...it should be in the range of a couple |
116 // 5 Million a's...it should be in the range of a few |
131 // of seconds. |
117 // of seconds. |
132 |
118 |
133 def time_needed[T](i: Int, code: => T) = { |
119 def time_needed[T](i: Int, code: => T) = { |
134 val start = System.nanoTime() |
120 val start = System.nanoTime() |
135 for (j <- 1 to i) code |
121 for (j <- 1 to i) code |