39 def % = STAR(s) |
41 def % = STAR(s) |
40 def ~ (r: Rexp) = SEQ(s, r) |
42 def ~ (r: Rexp) = SEQ(s, r) |
41 def ~ (r: String) = SEQ(s, r) |
43 def ~ (r: String) = SEQ(s, r) |
42 } |
44 } |
43 |
45 |
44 // (1) Complete the function nullable according to |
46 // (1) |
45 // the definition given in the coursework; this |
|
46 // function checks whether a regular expression |
|
47 // can match the empty string and Returns a boolean |
|
48 // accordingly. |
|
49 |
|
50 def nullable (r: Rexp) : Boolean = r match { |
47 def nullable (r: Rexp) : Boolean = r match { |
51 case ZERO => false |
48 case ZERO => false |
52 case ONE => true |
49 case ONE => true |
53 case CHAR(_) => false |
50 case CHAR(_) => false |
54 case ALTs(rs) => rs.exists(nullable) |
51 case ALTs(rs) => rs.exists(nullable) |
55 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
52 case SEQs(rs) => rs.forall(nullable) |
56 case STAR(_) => true |
53 case STAR(_) => true |
57 } |
54 } |
58 |
55 |
59 // (2) Complete the function der according to |
56 // (2) |
60 // the definition given in the coursework; this |
57 def der(c: Char, r: Rexp) : Rexp = r match { |
61 // function calculates the derivative of a |
|
62 // regular expression w.r.t. a character. |
|
63 |
|
64 def der (c: Char, r: Rexp) : Rexp = r match { |
|
65 case ZERO => ZERO |
58 case ZERO => ZERO |
66 case ONE => ZERO |
59 case ONE => ZERO |
67 case CHAR(d) => if (c == d) ONE else ZERO |
60 case CHAR(d) => if (c == d) ONE else ZERO |
68 case ALTs(rs) => ALTs(rs.map(der(c, _))) |
61 case ALTs(rs) => ALTs(rs.map(der(c, _))) |
69 case SEQ(r1, r2) => |
62 case SEQs(Nil) => ZERO |
70 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
63 case SEQs(r1::rs) => |
71 else SEQ(der(c, r1), r2) |
64 if (nullable(r1)) ALT(SEQs(der(c, r1)::rs), der(c, SEQs(rs))) |
|
65 else SEQs(der(c, r1):: rs) |
72 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
66 case STAR(r1) => SEQ(der(c, r1), STAR(r1)) |
73 } |
67 } |
74 |
68 |
75 |
69 |
76 // (3) Implement the flatten function flts. It |
70 // (3) |
77 // deletes 0s from a list of regular expressions |
71 def denest(rs: List[Rexp]) : List[Rexp] = rs match { |
78 // and also 'spills out', or flattens, nested |
|
79 // ALTernativeS. |
|
80 |
|
81 def flts(rs: List[Rexp]) : List[Rexp] = rs match { |
|
82 case Nil => Nil |
72 case Nil => Nil |
83 case ZERO::tl => flts(tl) |
73 case ZERO::tl => denest(tl) |
84 case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) |
74 case ALTs(rs1)::rs2 => rs1 ::: denest(rs2) |
85 case r::rs => r :: flts(rs) |
75 case r::rs => r :: denest(rs) |
86 } |
76 } |
87 |
77 |
88 // (4) Complete the simp function according to |
78 // (4) |
89 // the specification given in the coursework; this |
79 def flts(rs: List[Rexp], acc: List[Rexp] = Nil) : List[Rexp] = rs match { |
90 // function simplifies a regular expression from |
80 case Nil => acc |
91 // the inside out, like you would simplify arithmetic |
81 case ZERO::rs => ZERO::Nil |
92 // expressions; however it does not simplify inside |
82 case ONE::rs => flts(rs, acc) |
93 // STAR-regular expressions. |
83 case SEQs(rs1)::rs => flts(rs, acc ::: rs1) |
|
84 case r::rs => flts(rs, acc :+ r) |
|
85 } |
94 |
86 |
|
87 // (5) |
|
88 def ALTs_smart(rs: List[Rexp]) : Rexp = rs match { |
|
89 case Nil => ZERO |
|
90 case r::Nil => r |
|
91 case rs => ALTs(rs) |
|
92 } |
|
93 |
|
94 def SEQs_smart(rs: List[Rexp]) : Rexp = rs match { |
|
95 case Nil => ONE |
|
96 case ZERO::nil => ZERO |
|
97 case r::Nil => r |
|
98 case rs => SEQs(rs) |
|
99 } |
|
100 |
|
101 // (6) |
95 |
102 |
96 def simp(r: Rexp) : Rexp = r match { |
103 def simp(r: Rexp) : Rexp = r match { |
97 case ALTs(rs) => (flts(rs.map(simp)).distinct) match { |
104 case ALTs(rs) => |
98 case Nil => ZERO |
105 ALTs_smart(denest(rs.map(simp)).distinct) |
99 case r::Nil => r |
106 case SEQs(rs) => |
100 case rs => ALTs(rs) |
107 SEQs_smart(flts(rs.map(simp))) |
101 } |
|
102 case SEQ(r1, r2) => (simp(r1), simp(r2)) match { |
|
103 case (ZERO, _) => ZERO |
|
104 case (_, ZERO) => ZERO |
|
105 case (ONE, r2s) => r2s |
|
106 case (r1s, ONE) => r1s |
|
107 case (r1s, r2s) => SEQ(r1s, r2s) |
|
108 } |
|
109 case r => r |
108 case r => r |
110 } |
109 } |
111 |
110 |
112 simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE)) |
111 //println("Simp tests") |
|
112 //println(simp(ALT(ONE | CHAR('a'), CHAR('a') | ONE))) |
|
113 //println(simp(((CHAR('a') | ZERO) ~ ONE) | |
|
114 // (((ONE | CHAR('b')) | CHAR('c')) ~ (CHAR('d') ~ ZERO)))) |
113 |
115 |
114 // (5) Complete the two functions below; the first |
116 |
115 // calculates the derivative w.r.t. a string; the second |
117 // (7) |
116 // is the regular expression matcher taking a regular |
|
117 // expression and a string and checks whether the |
|
118 // string matches the regular expression. |
|
119 |
118 |
120 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
119 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
121 case Nil => r |
120 case Nil => r |
122 case c::s => ders(s, simp(der(c, r))) |
121 case c::s => ders(s, simp(der(c, r))) |
123 } |
122 } |
124 |
123 |
125 // main matcher function |
124 // main matcher function |
126 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) |
125 def matcher(r: Rexp, s: String) = nullable(ders(s.toList, r)) |
127 |
126 |
128 // (6) Complete the size function for regular |
127 // (8) |
129 // expressions according to the specification |
|
130 // given in the coursework. |
|
131 |
|
132 |
128 |
133 def size(r: Rexp): Int = r match { |
129 def size(r: Rexp): Int = r match { |
134 case ZERO => 1 |
130 case ZERO => 1 |
135 case ONE => 1 |
131 case ONE => 1 |
136 case CHAR(_) => 1 |
132 case CHAR(_) => 1 |
137 case ALTs(rs) => 1 + rs.map(size).sum |
133 case ALTs(rs) => 1 + rs.map(size).sum |
138 case SEQ(r1, r2) => 1 + size(r1) + size (r2) |
134 case SEQs(rs) => 1 + rs.map(size).sum |
139 case STAR(r1) => 1 + size(r1) |
135 case STAR(r1) => 1 + size(r1) |
140 } |
136 } |
141 |
137 |
142 |
138 |
143 |
139 |
144 // some testing data |
140 // some testing data |
145 |
141 /* |
146 //matcher(("a" ~ "b") ~ "c", "abc") // => true |
142 println(matcher(("a" ~ "b") ~ "c", "abc")) // => true |
147 //matcher(("a" ~ "b") ~ "c", "ab") // => false |
143 println(matcher(("a" ~ "b") ~ "c", "ab")) // => false |
148 |
144 |
149 // the supposedly 'evil' regular expression (a*)* b |
145 // the supposedly 'evil' regular expression (a*)* b |
150 // val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
146 val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) |
151 |
147 |
152 //println(matcher(EVIL, "a" * 1000 ++ "b")) // => true |
148 println(matcher(EVIL, "a" * 1000 ++ "b")) // => true |
153 //println(matcher(EVIL, "a" * 1000)) // => false |
149 println(matcher(EVIL, "a" * 1000)) // => false |
154 |
150 |
155 // size without simplifications |
151 // size without simplifications |
156 //println(size(der('a', der('a', EVIL)))) // => 28 |
152 println(size(der('a', der('a', EVIL)))) // => 36 |
157 //println(size(der('a', der('a', der('a', EVIL))))) // => 58 |
153 println(size(der('a', der('a', der('a', EVIL))))) // => 83 |
158 |
154 |
159 // size with simplification |
155 // size with simplification |
160 //println(simp(der('a', der('a', EVIL)))) |
156 println(simp(der('a', der('a', EVIL)))) |
161 //println(simp(der('a', der('a', der('a', EVIL))))) |
157 println(simp(der('a', der('a', der('a', EVIL))))) |
162 |
158 |
163 //println(size(simp(der('a', der('a', EVIL))))) // => 8 |
159 println(size(simp(der('a', der('a', EVIL))))) // => 7 |
164 //println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 |
160 println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 7 |
165 |
161 |
166 // Python needs around 30 seconds for matching 28 a's with EVIL. |
162 // Python needs around 30 seconds for matching 28 a's with EVIL. |
167 // Java 9 and later increase this to an "astonishing" 40000 a's in |
163 // Java 9 and later increase this to an "astonishing" 40000 a's in |
168 // around 30 seconds. |
164 // around 30 seconds. |
169 // |
165 // |
170 // Lets see how long it takes to match strings with |
166 // Lets see how long it takes to match strings with |
171 // 5 Million a's...it should be in the range of a |
167 // 5 Million a's...it should be in the range of a |
172 // couple of seconds. |
168 // few seconds. |
173 |
169 |
174 def time_needed[T](i: Int, code: => T) = { |
170 def time_needed[T](i: Int, code: => T) = { |
175 val start = System.nanoTime() |
171 val start = System.nanoTime() |
176 for (j <- 1 to i) code |
172 for (j <- 1 to i) code |
177 val end = System.nanoTime() |
173 val end = System.nanoTime() |
178 "%.5f".format((end - start)/(i * 1.0e9)) |
174 "%.5f".format((end - start)/(i * 1.0e9)) |
179 } |
175 } |
180 |
176 |
181 //for (i <- 0 to 5000000 by 500000) { |
177 for (i <- 0 to 5000000 by 500000) { |
182 // println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
178 println(s"$i ${time_needed(2, matcher(EVIL, "a" * i))} secs.") |
183 //} |
179 } |
184 |
180 |
185 // another "power" test case |
181 // another "power" test case |
186 //simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next) == ONE |
182 println(simp(Iterator.iterate(ONE:Rexp)(r => SEQ(r, ONE | ONE)).drop(100).next()) == ONE) |
187 |
183 |
188 // the Iterator produces the rexp |
184 // the Iterator produces the rexp |
189 // |
185 // |
190 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
186 // SEQ(SEQ(SEQ(..., ONE | ONE) , ONE | ONE), ONE | ONE) |
191 // |
187 // |
192 // where SEQ is nested 50 times. |
188 // where SEQ is nested 100 times. |
193 |
189 */ |
194 |
190 |
195 |
191 |
196 } |
192 } |
|
193 |