51 // accordingly. |
50 // accordingly. |
52 |
51 |
53 def nullable (r: Rexp) : Boolean = r match { |
52 def nullable (r: Rexp) : Boolean = r match { |
54 case ZERO => false |
53 case ZERO => false |
55 case ONE => true |
54 case ONE => true |
56 case CHAR(_) => false |
55 case CHAR(c) => false |
57 case ALTs(rs) => rs.exists(nullable) |
56 case ALTs(rs) => { |
58 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
57 if (rs.size == 0) false |
59 case STAR(_) => true |
58 else if (nullable(rs.head)) true |
60 } |
59 else nullable(ALTs(rs.tail)) |
|
60 } |
|
61 case SEQ(c, s) => nullable(c) && nullable(s) |
|
62 case STAR(r) => true |
|
63 case _ => false |
|
64 } |
|
65 |
61 |
66 |
62 // (2) Complete the function der according to |
67 // (2) Complete the function der according to |
63 // the definition given in the coursework; this |
68 // the definition given in the coursework; this |
64 // function calculates the derivative of a |
69 // function calculates the derivative of a |
65 // regular expression w.r.t. a character. |
70 // regular expression w.r.t. a character. |
66 |
71 |
67 def der (c: Char, r: Rexp) : Rexp = r match { |
72 def der (c: Char, r: Rexp) : Rexp = r match { |
68 case ZERO => ZERO |
73 case ZERO => ZERO |
69 case ONE => ZERO |
74 case ONE => ZERO |
70 case CHAR(d) => if (c == d) ONE else ZERO |
75 case CHAR(x) => { |
71 case ALTs(rs) => ALTs(rs.map(der(c, _))) |
76 if (x==c) ONE |
72 case SEQ(r1, r2) => |
77 else ZERO |
73 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
78 } |
74 else SEQ(der(c, r1), r2) |
79 case ALTs(rs) => ALTs(for (i <- rs) yield der(c, i)) |
75 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
80 case SEQ(x, y) => { |
76 } |
81 if (nullable(x)) ALTs(List(SEQ(der(c, x), y), der(c, y))) |
77 |
82 else SEQ(der(c, x), y) |
78 |
83 } |
|
84 case STAR(x) => SEQ(der(c, x), STAR(x)) |
|
85 } |
|
86 |
|
87 |
|
88 // (3) Implement the flatten function flts. It |
|
89 // deletes 0s from a list of regular expressions |
|
90 // and also 'spills out', or flattens, nested |
|
91 // ALTernativeS. |
79 |
92 |
80 def flts(rs: List[Rexp]) : List[Rexp] = rs match { |
93 def flts(rs: List[Rexp]) : List[Rexp] = rs match { |
81 case Nil => Nil |
94 case Nil => Nil |
82 case ZERO::tl => flts(tl) |
95 case ZERO::rest => flts(rest) |
83 case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) |
96 case ALTs(rs_other)::rest => rs_other ::: flts(rest) |
84 case r::rs => r :: flts(rs) |
97 case r::rest => r::flts(rest) |
85 } |
98 } |
86 |
99 |
87 // (3) Complete the simp function according to |
100 |
88 // the specification given in the coursework; this |
101 |
89 // function simplifies a regular expression from |
102 // (4) Complete the simp function according to |
|
103 // the specification given in the coursework description; |
|
104 // this function simplifies a regular expression from |
90 // the inside out, like you would simplify arithmetic |
105 // the inside out, like you would simplify arithmetic |
91 // expressions; however it does not simplify inside |
106 // expressions; however it does not simplify inside |
92 // STAR-regular expressions. |
107 // STAR-regular expressions. Use the _.distinct and |
93 |
108 // flts functions. |
94 |
109 |
95 def simp(r: Rexp) : Rexp = r match { |
110 def simp(r: Rexp) : Rexp = r match { |
96 case ALTs(rs) => (flts(rs.map(simp)).distinct) match { |
111 case SEQ(x, ZERO) => ZERO |
97 case Nil => ZERO |
112 case SEQ(ZERO, x) => ZERO |
98 case r::Nil => r |
113 case SEQ(x, ONE) => x |
99 case rs => ALTs(rs) |
114 case SEQ(ONE, x) => x |
100 } |
115 case SEQ(x, y) => SEQ(simp(x), simp(y)) |
101 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
116 case ALTs(rs) => { |
102 case (ZERO, _) => ZERO |
117 val list = flts(for (x <- rs) yield simp(x)).distinct |
103 case (_, ZERO) => ZERO |
118 if (list.size == 0) ZERO |
104 case (ONE, r2s) => r2s |
119 else if (list.size == 1) list.head |
105 case (r1s, ONE) => r1s |
120 else ALTs(list) |
106 case (r1s, r2s) => SEQ(r1s, r2s) |
121 } |
107 } |
122 case x => x |
108 case r => r |
123 } |
109 } |
124 |
110 |
125 |
111 |
126 // (5) Complete the two functions below; the first |
112 // (4) Complete the two functions below; the first |
|
113 // calculates the derivative w.r.t. a string; the second |
127 // calculates the derivative w.r.t. a string; the second |
114 // is the regular expression matcher taking a regular |
128 // is the regular expression matcher taking a regular |
115 // expression and a string and checks whether the |
129 // expression and a string and checks whether the |
116 // string matches the regular expression. |
130 // string matches the regular expression |
117 |
131 |
118 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
132 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
119 case Nil => r |
133 case Nil => r |
120 case c::s => ders(s, simp(der(c, r))) |
134 case c::rest => { |
121 } |
135 val deriv = simp(der(c,r)) |
122 |
136 ders(rest, deriv) |
123 // main matcher function |
137 } |
124 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) |
138 } |
125 |
139 |
126 // (5) Complete the size function for regular |
140 def matcher(r: Rexp, s: String): Boolean = nullable(ders(s.toList, r)) |
|
141 |
|
142 |
|
143 // (6) Complete the size function for regular |
127 // expressions according to the specification |
144 // expressions according to the specification |
128 // given in the coursework. |
145 // given in the coursework. |
129 |
146 |
130 |
|
131 def size(r: Rexp): Int = r match { |
147 def size(r: Rexp): Int = r match { |
|
148 case Nil => 0 |
132 case ZERO => 1 |
149 case ZERO => 1 |
133 case ONE => 1 |
150 case ONE => 1 |
134 case CHAR(_) => 1 |
151 case CHAR(x) => 1 |
135 case ALTs(rs) => 1 + rs.map(size).sum |
152 case ALTs(rs) => 1 + (for (x <- rs) yield size(x)).sum |
136 case SEQ(r1, r2) => 1 + size(r1) + size (r2) |
153 case SEQ(x, y) => 1 + size(x) + size(y) |
137 case STAR(r1) => 1 + size(r1) |
154 case STAR(x) => 1 + size(x) |
138 } |
155 } |
139 |
|
140 |
156 |
141 |
157 |
142 // some testing data |
158 // some testing data |
143 |
159 |
144 //matcher(("a" ~ "b") ~ "c", "abc") // => true |
160 |
145 //matcher(("a" ~ "b") ~ "c", "ab") // => false |
161 // matcher(("a" ~ "b") ~ "c", "abc") // => true |
|
162 // matcher(("a" ~ "b") ~ "c", "ab") // => false |
146 |
163 |
147 // the supposedly 'evil' regular expression (a*)* b |
164 // the supposedly 'evil' regular expression (a*)* b |
148 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
165 // val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
149 |
166 |
150 //println(matcher(EVIL, "a" * 1000 ++ "b")) // => true |
167 // matcher(EVIL, "a" * 1000 ++ "b") // => true |
151 //println(matcher(EVIL, "a" * 1000)) // => false |
168 // matcher(EVIL, "a" * 1000) // => false |
152 |
169 |
153 // size without simplifications |
170 // size without simplifications |
154 //println(size(der('a', der('a', EVIL)))) // => 28 |
171 // size(der('a', der('a', EVIL))) // => 28 |
155 //println(size(der('a', der('a', der('a', EVIL))))) // => 58 |
172 // size(der('a', der('a', der('a', EVIL)))) // => 58 |
156 |
173 |
157 // size with simplification |
174 // size with simplification |
158 //println(simp(der('a', der('a', EVIL)))) |
175 // size(simp(der('a', der('a', EVIL)))) // => 8 |
159 //println(simp(der('a', der('a', der('a', EVIL))))) |
176 // size(simp(der('a', der('a', der('a', EVIL))))) // => 8 |
160 |
|
161 //println(size(simp(der('a', der('a', EVIL))))) // => 8 |
|
162 //println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 |
|
163 |
177 |
164 // Python needs around 30 seconds for matching 28 a's with EVIL. |
178 // Python needs around 30 seconds for matching 28 a's with EVIL. |
165 // Java 9 and later increase this to an "astonishing" 40000 a's in |
179 // Java 9 and later increase this to an "astonishing" 40000 a's in |
166 // around 30 seconds. |
180 // 30 seconds. |
167 // |
181 // |
168 // Lets see how long it takes to match strings with |
182 // Lets see how long it really takes to match strings with |
169 // 5 Million a's...it should be in the range of a |
183 // 5 Million a's...it should be in the range of a couple |
170 // couple of seconds. |
184 // of seconds. |
171 |
185 |
172 def time_needed[T](i: Int, code: => T) = { |
186 // def time_needed[T](i: Int, code: => T) = { |
173 val start = System.nanoTime() |
187 // val start = System.nanoTime() |
174 for (j <- 1 to i) code |
188 // for (j <- 1 to i) code |
175 val end = System.nanoTime() |
189 // val end = System.nanoTime() |
176 "%.5f".format((end - start)/(i * 1.0e9)) |
190 // "%.5f".format((end - start)/(i * 1.0e9)) |
177 } |
191 // } |
178 |
192 |
179 //for (i <- 0 to 5000000 by 500000) { |
193 // for (i <- 0 to 5000000 by 500000) { |
180 // println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
194 // println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
181 //} |
195 // } |
182 |
196 |
183 // another "power" test case |
197 // another "power" test case |
184 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE |
198 // simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE |
185 |
199 |
186 // the Iterator produces the rexp |
200 // the Iterator produces the rexp |
187 // |
201 // |
188 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
202 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
189 // |
203 // |
190 // where SEQ is nested 50 times. |
204 // where SEQ is nested 50 times. |
191 |
205 |
192 |
206 // This a dummy comment. Hopefully it works! |
193 |
207 |
194 } |
208 } |
|
209 |