50 // accordingly. |
48 // accordingly. |
51 |
49 |
52 def nullable (r: Rexp) : Boolean = r match { |
50 def nullable (r: Rexp) : Boolean = r match { |
53 case ZERO => false |
51 case ZERO => false |
54 case ONE => true |
52 case ONE => true |
55 case CHAR(c) => false |
53 case CHAR(_) => false |
56 case ALTs(rs) => { |
54 case ALTs(rs) => rs.exists(nullable) |
57 if (rs.size == 0) false |
55 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
58 else if (nullable(rs.head)) true |
56 case STAR(_) => true |
59 else nullable(ALTs(rs.tail)) |
|
60 } |
|
61 case SEQ(c, s) => nullable(c) && nullable(s) |
|
62 case STAR(r) => true |
|
63 case _ => false |
|
64 } |
57 } |
65 |
|
66 |
58 |
67 // (2) Complete the function der according to |
59 // (2) Complete the function der according to |
68 // the definition given in the coursework; this |
60 // the definition given in the coursework; this |
69 // function calculates the derivative of a |
61 // function calculates the derivative of a |
70 // regular expression w.r.t. a character. |
62 // regular expression w.r.t. a character. |
71 |
63 |
72 def der (c: Char, r: Rexp) : Rexp = r match { |
64 def der (c: Char, r: Rexp) : Rexp = r match { |
73 case ZERO => ZERO |
65 case ZERO => ZERO |
74 case ONE => ZERO |
66 case ONE => ZERO |
75 case CHAR(x) => { |
67 case CHAR(d) => if (c == d) ONE else ZERO |
76 if (x==c) ONE |
68 case ALTs(rs) => ALTs(rs.map(der(c, _))) |
77 else ZERO |
69 case SEQ(r1, r2) => |
78 } |
70 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
79 case ALTs(rs) => ALTs(for (i <- rs) yield der(c, i)) |
71 else SEQ(der(c, r1), r2) |
80 case SEQ(x, y) => { |
72 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
81 if (nullable(x)) ALTs(List(SEQ(der(c, x), y), der(c, y))) |
|
82 else SEQ(der(c, x), y) |
|
83 } |
|
84 case STAR(x) => SEQ(der(c, x), STAR(x)) |
|
85 } |
73 } |
86 |
74 |
87 |
75 |
88 // (3) Implement the flatten function flts. It |
76 // (3) Implement the flatten function flts. It |
89 // deletes 0s from a list of regular expressions |
77 // deletes 0s from a list of regular expressions |
90 // and also 'spills out', or flattens, nested |
78 // and also 'spills out', or flattens, nested |
91 // ALTernativeS. |
79 // ALTernativeS. |
92 |
80 |
93 def flts(rs: List[Rexp]) : List[Rexp] = rs match { |
81 def flts(rs: List[Rexp]) : List[Rexp] = rs match { |
94 case Nil => Nil |
82 case Nil => Nil |
95 case ZERO::rest => flts(rest) |
83 case ZERO::tl => flts(tl) |
96 case ALTs(rs_other)::rest => rs_other ::: flts(rest) |
84 case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) |
97 case r::rest => r::flts(rest) |
85 case r::rs => r :: flts(rs) |
98 } |
86 } |
99 |
87 |
100 |
88 |
101 |
89 |
102 // (4) Complete the simp function according to |
90 // (4) Complete the simp function according to |
103 // the specification given in the coursework description; |
91 // the specification given in the coursework; this |
104 // this function simplifies a regular expression from |
92 // function simplifies a regular expression from |
105 // the inside out, like you would simplify arithmetic |
93 // the inside out, like you would simplify arithmetic |
106 // expressions; however it does not simplify inside |
94 // expressions; however it does not simplify inside |
107 // STAR-regular expressions. Use the _.distinct and |
95 // STAR-regular expressions. |
108 // flts functions. |
96 |
109 |
97 |
110 def simp(r: Rexp) : Rexp = r match { |
98 def simp(r: Rexp) : Rexp = r match { |
111 case SEQ(x, ZERO) => ZERO |
99 case ALTs(rs) => (flts(rs.map(simp)).distinct) match { |
112 case SEQ(ZERO, x) => ZERO |
100 case Nil => ZERO |
113 case SEQ(x, ONE) => x |
101 case r::Nil => r |
114 case SEQ(ONE, x) => x |
102 case rs => ALTs(rs) |
115 case SEQ(x, y) => SEQ(simp(x), simp(y)) |
|
116 case ALTs(rs) => { |
|
117 val list = flts(for (x <- rs) yield simp(x)).distinct |
|
118 if (list.size == 0) ZERO |
|
119 else if (list.size == 1) list.head |
|
120 else ALTs(list) |
|
121 } |
103 } |
122 case x => x |
104 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
|
105 case (ZERO, _) => ZERO |
|
106 case (_, ZERO) => ZERO |
|
107 case (ONE, r2s) => r2s |
|
108 case (r1s, ONE) => r1s |
|
109 case (r1s, r2s) => SEQ(r1s, r2s) |
|
110 } |
|
111 case r => r |
123 } |
112 } |
124 |
113 |
|
114 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) |
125 |
115 |
126 // (5) Complete the two functions below; the first |
116 // (5) Complete the two functions below; the first |
127 // calculates the derivative w.r.t. a string; the second |
117 // calculates the derivative w.r.t. a string; the second |
128 // is the regular expression matcher taking a regular |
118 // is the regular expression matcher taking a regular |
129 // expression and a string and checks whether the |
119 // expression and a string and checks whether the |
130 // string matches the regular expression |
120 // string matches the regular expression. |
131 |
121 |
132 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
122 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
133 case Nil => r |
123 case Nil => r |
134 case c::rest => { |
124 case c::s => ders(s, simp(der(c, r))) |
135 val deriv = simp(der(c,r)) |
|
136 ders(rest, deriv) |
|
137 } |
|
138 } |
125 } |
139 |
126 |
140 def matcher(r: Rexp, s: String): Boolean = nullable(ders(s.toList, r)) |
127 // main matcher function |
141 |
128 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) |
142 |
129 |
143 // (6) Complete the size function for regular |
130 // (6) Complete the size function for regular |
144 // expressions according to the specification |
131 // expressions according to the specification |
145 // given in the coursework. |
132 // given in the coursework. |
146 |
133 |
|
134 |
147 def size(r: Rexp): Int = r match { |
135 def size(r: Rexp): Int = r match { |
148 case Nil => 0 |
|
149 case ZERO => 1 |
136 case ZERO => 1 |
150 case ONE => 1 |
137 case ONE => 1 |
151 case CHAR(x) => 1 |
138 case CHAR(_) => 1 |
152 case ALTs(rs) => 1 + (for (x <- rs) yield size(x)).sum |
139 case ALTs(rs) => 1 + rs.map(size).sum |
153 case SEQ(x, y) => 1 + size(x) + size(y) |
140 case SEQ(r1, r2) => 1 + size(r1) + size (r2) |
154 case STAR(x) => 1 + size(x) |
141 case STAR(r1) => 1 + size(r1) |
155 } |
142 } |
|
143 |
156 |
144 |
157 |
145 |
158 // some testing data |
146 // some testing data |
159 |
147 |
160 |
148 //matcher(("a" ~ "b") ~ "c", "abc") // => true |
161 // matcher(("a" ~ "b") ~ "c", "abc") // => true |
149 //matcher(("a" ~ "b") ~ "c", "ab") // => false |
162 // matcher(("a" ~ "b") ~ "c", "ab") // => false |
|
163 |
150 |
164 // the supposedly 'evil' regular expression (a*)* b |
151 // the supposedly 'evil' regular expression (a*)* b |
165 // val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
152 // val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
166 |
153 |
167 // matcher(EVIL, "a" * 1000 ++ "b") // => true |
154 //println(matcher(EVIL, "a" * 1000 ++ "b")) // => true |
168 // matcher(EVIL, "a" * 1000) // => false |
155 //println(matcher(EVIL, "a" * 1000)) // => false |
169 |
156 |
170 // size without simplifications |
157 // size without simplifications |
171 // size(der('a', der('a', EVIL))) // => 28 |
158 //println(size(der('a', der('a', EVIL)))) // => 28 |
172 // size(der('a', der('a', der('a', EVIL)))) // => 58 |
159 //println(size(der('a', der('a', der('a', EVIL))))) // => 58 |
173 |
160 |
174 // size with simplification |
161 // size with simplification |
175 // size(simp(der('a', der('a', EVIL)))) // => 8 |
162 //println(simp(der('a', der('a', EVIL)))) |
176 // size(simp(der('a', der('a', der('a', EVIL))))) // => 8 |
163 //println(simp(der('a', der('a', der('a', EVIL))))) |
|
164 |
|
165 //println(size(simp(der('a', der('a', EVIL))))) // => 8 |
|
166 //println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 |
177 |
167 |
178 // Python needs around 30 seconds for matching 28 a's with EVIL. |
168 // Python needs around 30 seconds for matching 28 a's with EVIL. |
179 // Java 9 and later increase this to an "astonishing" 40000 a's in |
169 // Java 9 and later increase this to an "astonishing" 40000 a's in |
180 // 30 seconds. |
170 // around 30 seconds. |
181 // |
171 // |
182 // Lets see how long it really takes to match strings with |
172 // Lets see how long it takes to match strings with |
183 // 5 Million a's...it should be in the range of a couple |
173 // 5 Million a's...it should be in the range of a |
184 // of seconds. |
174 // couple of seconds. |
185 |
175 |
186 // def time_needed[T](i: Int, code: => T) = { |
176 def time_needed[T](i: Int, code: => T) = { |
187 // val start = System.nanoTime() |
177 val start = System.nanoTime() |
188 // for (j <- 1 to i) code |
178 for (j <- 1 to i) code |
189 // val end = System.nanoTime() |
179 val end = System.nanoTime() |
190 // "%.5f".format((end - start)/(i * 1.0e9)) |
180 "%.5f".format((end - start)/(i * 1.0e9)) |
191 // } |
181 } |
192 |
182 |
193 // for (i <- 0 to 5000000 by 500000) { |
183 //for (i <- 0 to 5000000 by 500000) { |
194 // println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
184 // println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
195 // } |
185 //} |
196 |
186 |
197 // another "power" test case |
187 // another "power" test case |
198 // simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(50).next()) == ONE |
188 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE |
199 |
189 |
200 // the Iterator produces the rexp |
190 // the Iterator produces the rexp |
201 // |
191 // |
202 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
192 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
203 // |
193 // |
204 // where SEQ is nested 50 times. |
194 // where SEQ is nested 50 times. |
|
195 |
205 |
196 |
206 // This a dummy comment. Hopefully it works! |
|
207 |
197 |
208 } |
198 } |
209 |
|