author | Christian Urban <urbanc@in.tum.de> |
Wed, 13 Mar 2019 10:36:29 +0000 | |
changeset 314 | 20a57552d722 |
parent 312 | 8b0b414e71b0 |
permissions | -rw-r--r-- |
298 | 1 |
|
2 |
import scala.language.implicitConversions |
|
3 |
import scala.language.reflectiveCalls |
|
4 |
import scala.annotation.tailrec |
|
5 |
import scala.util.Try |
|
6 |
||
7 |
def escape(raw: String) : String = { |
|
8 |
import scala.reflect.runtime.universe._ |
|
9 |
Literal(Constant(raw)).toString |
|
10 |
} |
|
11 |
||
12 |
def esc2(r: (String, String)) = (escape(r._1), escape(r._2)) |
|
13 |
||
14 |
||
15 |
||
16 |
// usual regular expressions |
|
17 |
abstract class Rexp |
|
18 |
case object ZERO extends Rexp |
|
19 |
case object ONE extends Rexp |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
20 |
case class CHAR(c: Char) extends Rexp |
298 | 21 |
case class ALTS(rs: List[Rexp]) extends Rexp |
22 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
23 |
case class STAR(r: Rexp) extends Rexp |
|
24 |
case class RECD(x: String, r: Rexp) extends Rexp |
|
25 |
||
26 |
// abbreviations |
|
27 |
def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) |
|
28 |
||
29 |
// values |
|
30 |
abstract class Val |
|
31 |
case object Empty extends Val |
|
32 |
case class Chr(c: Char) extends Val |
|
33 |
case class Sequ(v1: Val, v2: Val) extends Val |
|
34 |
case class Left(v: Val) extends Val |
|
35 |
case class Right(v: Val) extends Val |
|
36 |
case class Stars(vs: List[Val]) extends Val |
|
37 |
case class Rec(x: String, v: Val) extends Val |
|
38 |
||
39 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
40 |
|
298 | 41 |
// some convenience for typing in regular expressions |
42 |
def charlist2rexp(s : List[Char]): Rexp = s match { |
|
43 |
case Nil => ONE |
|
44 |
case c::Nil => CHAR(c) |
|
45 |
case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
46 |
} |
|
47 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
48 |
||
49 |
implicit def RexpOps(r: Rexp) = new { |
|
50 |
def | (s: Rexp) = ALT(r, s) |
|
51 |
def % = STAR(r) |
|
52 |
def ~ (s: Rexp) = SEQ(r, s) |
|
53 |
} |
|
54 |
||
55 |
implicit def stringOps(s: String) = new { |
|
56 |
def | (r: Rexp) = ALT(s, r) |
|
57 |
def | (r: String) = ALT(s, r) |
|
58 |
def % = STAR(s) |
|
59 |
def ~ (r: Rexp) = SEQ(s, r) |
|
60 |
def ~ (r: String) = SEQ(s, r) |
|
61 |
def $ (r: Rexp) = RECD(s, r) |
|
62 |
} |
|
63 |
||
64 |
||
300 | 65 |
// string of a regular expressions - for testing purposes |
305 | 66 |
def string(r: Rexp): String = r match { |
67 |
case ZERO => "0" |
|
68 |
case ONE => "1" |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
69 |
case CHAR(c) => c.toString |
305 | 70 |
case ALTS(rs) => rs.map(string).mkString("[", "|", "]") |
71 |
case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})" |
|
72 |
case STAR(r) => s"{${string(r)}}*" |
|
73 |
case RECD(x, r) => s"(${x}! ${string(r)})" |
|
74 |
} |
|
75 |
||
300 | 76 |
|
306
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
77 |
//-------------------------------------------------------------- |
298 | 78 |
// START OF NON-BITCODE PART |
79 |
// |
|
80 |
||
81 |
// nullable function: tests whether the regular |
|
82 |
// expression can recognise the empty string |
|
83 |
def nullable (r: Rexp) : Boolean = r match { |
|
84 |
case ZERO => false |
|
85 |
case ONE => true |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
86 |
case CHAR(_) => false |
298 | 87 |
case ALTS(rs) => rs.exists(nullable) |
88 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
89 |
case STAR(_) => true |
|
90 |
case RECD(_, r) => nullable(r) |
|
91 |
} |
|
92 |
||
93 |
// derivative of a regular expression w.r.t. a character |
|
94 |
def der (c: Char, r: Rexp) : Rexp = r match { |
|
95 |
case ZERO => ZERO |
|
96 |
case ONE => ZERO |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
97 |
case CHAR(d) => if (c == d) ONE else ZERO |
298 | 98 |
case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) |
99 |
case SEQ(r1, r2) => |
|
100 |
if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2))) |
|
101 |
else SEQ(der(c, r1), r2) |
|
102 |
case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
103 |
case RECD(_, r1) => der(c, r1) |
|
104 |
} |
|
105 |
||
106 |
||
107 |
def flatten(v: Val) : String = v match { |
|
108 |
case Empty => "" |
|
109 |
case Chr(c) => c.toString |
|
110 |
case Left(v) => flatten(v) |
|
111 |
case Right(v) => flatten(v) |
|
112 |
case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
|
113 |
case Stars(vs) => vs.map(flatten).mkString |
|
114 |
case Rec(_, v) => flatten(v) |
|
115 |
} |
|
116 |
||
117 |
// extracts an environment from a value |
|
118 |
def env(v: Val) : List[(String, String)] = v match { |
|
119 |
case Empty => Nil |
|
120 |
case Chr(c) => Nil |
|
121 |
case Left(v) => env(v) |
|
122 |
case Right(v) => env(v) |
|
123 |
case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
124 |
case Stars(vs) => vs.flatMap(env) |
|
125 |
case Rec(x, v) => (x, flatten(v))::env(v) |
|
126 |
} |
|
127 |
||
128 |
||
129 |
// injection part |
|
130 |
def mkeps(r: Rexp) : Val = r match { |
|
131 |
case ONE => Empty |
|
132 |
case ALTS(List(r1, r2)) => |
|
133 |
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
134 |
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
135 |
case STAR(r) => Stars(Nil) |
|
136 |
case RECD(x, r) => Rec(x, mkeps(r)) |
|
137 |
} |
|
138 |
||
139 |
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
140 |
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
141 |
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
142 |
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
143 |
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
144 |
case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1)) |
|
145 |
case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2)) |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
146 |
case (CHAR(_), Empty) => Chr(c) |
298 | 147 |
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
148 |
} |
|
149 |
||
150 |
// lexing without simplification |
|
151 |
def lex(r: Rexp, s: List[Char]) : Val = s match { |
|
152 |
case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
153 |
case c::cs => inj(r, c, lex(der(c, r), cs)) |
|
154 |
} |
|
155 |
||
156 |
def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
|
157 |
||
158 |
//println(lexing(("ab" | "ab") ~ ("b" | ONE), "ab")) |
|
159 |
||
160 |
// some "rectification" functions for simplification |
|
161 |
def F_ID(v: Val): Val = v |
|
162 |
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) |
|
163 |
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) |
|
164 |
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { |
|
165 |
case Right(v) => Right(f2(v)) |
|
166 |
case Left(v) => Left(f1(v)) |
|
167 |
} |
|
168 |
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { |
|
169 |
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) |
|
170 |
} |
|
171 |
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = |
|
172 |
(v:Val) => Sequ(f1(Empty), f2(v)) |
|
173 |
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = |
|
174 |
(v:Val) => Sequ(f1(v), f2(Empty)) |
|
175 |
def F_RECD(f: Val => Val) = (v:Val) => v match { |
|
176 |
case Rec(x, v) => Rec(x, f(v)) |
|
177 |
} |
|
178 |
def F_ERROR(v: Val): Val = throw new Exception("error") |
|
179 |
||
180 |
// simplification of regular expressions returning also an |
|
181 |
// rectification function; no simplification under STAR |
|
182 |
def simp(r: Rexp): (Rexp, Val => Val) = r match { |
|
183 |
case ALTS(List(r1, r2)) => { |
|
184 |
val (r1s, f1s) = simp(r1) |
|
185 |
val (r2s, f2s) = simp(r2) |
|
186 |
(r1s, r2s) match { |
|
187 |
case (ZERO, _) => (r2s, F_RIGHT(f2s)) |
|
188 |
case (_, ZERO) => (r1s, F_LEFT(f1s)) |
|
189 |
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) |
|
190 |
else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) |
|
191 |
} |
|
192 |
} |
|
193 |
case SEQ(r1, r2) => { |
|
194 |
val (r1s, f1s) = simp(r1) |
|
195 |
val (r2s, f2s) = simp(r2) |
|
196 |
(r1s, r2s) match { |
|
197 |
case (ZERO, _) => (ZERO, F_ERROR) |
|
198 |
case (_, ZERO) => (ZERO, F_ERROR) |
|
199 |
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) |
|
200 |
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) |
|
201 |
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) |
|
202 |
} |
|
203 |
} |
|
204 |
case RECD(x, r1) => { |
|
205 |
val (r1s, f1s) = simp(r1) |
|
206 |
(RECD(x, r1s), F_RECD(f1s)) |
|
207 |
} |
|
208 |
case r => (r, F_ID) |
|
209 |
} |
|
210 |
||
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
211 |
def ders_simp(s: List[Char], r: Rexp) : Rexp = s match { |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
212 |
case Nil => r |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
213 |
case c::s => ders_simp(s, simp(der(c, r))._1) |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
214 |
} |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
215 |
|
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
216 |
|
298 | 217 |
def lex_simp(r: Rexp, s: List[Char]) : Val = s match { |
218 |
case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
219 |
case c::cs => { |
|
220 |
val (r_simp, f_simp) = simp(der(c, r)) |
|
221 |
inj(r, c, f_simp(lex_simp(r_simp, cs))) |
|
222 |
} |
|
223 |
} |
|
224 |
||
225 |
def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) |
|
226 |
||
306
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
227 |
//println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
228 |
|
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
229 |
|
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
230 |
def tokenise_simp(r: Rexp, s: String) = |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
231 |
env(lexing_simp(r, s)).map(esc2) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
232 |
|
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
233 |
//-------------------------------------------------------------------- |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
234 |
// Partial Derivatives |
298 | 235 |
|
236 |
||
306
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
237 |
def pder(c: Char, r: Rexp): Set[Rexp] = r match { |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
238 |
case ZERO => Set() |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
239 |
case ONE => Set() |
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
240 |
case CHAR(d) => if (c == d) Set(ONE) else Set() |
306
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
241 |
case ALTS(rs) => rs.toSet.flatMap(pder(c, _)) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
242 |
case SEQ(r1, r2) => |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
243 |
(for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
244 |
(if (nullable(r1)) pder(c, r2) else Set()) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
245 |
case STAR(r1) => |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
246 |
for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
247 |
case RECD(_, r1) => pder(c, r1) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
248 |
} |
298 | 249 |
|
306
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
250 |
def pders(cs: List[Char], r: Rexp): Set[Rexp] = cs match { |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
251 |
case Nil => Set(r) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
252 |
case c::cs => pder(c, r).flatMap(pders(cs, _)) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
253 |
} |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
254 |
|
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
255 |
def pders_simp(cs: List[Char], r: Rexp): Set[Rexp] = cs match { |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
256 |
case Nil => Set(r) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
257 |
case c::cs => pder(c, r).flatMap(pders_simp(cs, _)).map(simp(_)._1) |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
258 |
} |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
259 |
|
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
260 |
def psize(rs: Set[Rexp]) = |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
261 |
rs.map(size).sum |
6756b026c5fe
added partial derivatives to compare sizes
Christian Urban <urbanc@in.tum.de>
parents:
305
diff
changeset
|
262 |
|
298 | 263 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
264 |
// A simple parser for regexes |
298 | 265 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
266 |
case class Parser(s: String) { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
267 |
var i = 0 |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
268 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
269 |
def peek() = s(i) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
270 |
def eat(c: Char) = |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
271 |
if (c == s(i)) i = i + 1 else throw new Exception("Expected " + c + " got " + s(i)) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
272 |
def next() = { i = i + 1; s(i - 1) } |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
273 |
def more() = s.length - i > 0 |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
274 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
275 |
def Regex() : Rexp = { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
276 |
val t = Term(); |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
277 |
if (more() && peek() == '|') { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
278 |
eat ('|') ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
279 |
ALT(t, Regex()) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
280 |
} |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
281 |
else t |
298 | 282 |
} |
283 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
284 |
def Term() : Rexp = { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
285 |
var f : Rexp = |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
286 |
if (more() && peek() != ')' && peek() != '|') Factor() else ONE; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
287 |
while (more() && peek() != ')' && peek() != '|') { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
288 |
f = SEQ(f, Factor()) ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
289 |
} |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
290 |
f |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
291 |
} |
298 | 292 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
293 |
def Factor() : Rexp = { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
294 |
var b = Base(); |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
295 |
while (more() && peek() == '*') { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
296 |
eat('*') ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
297 |
b = STAR(b) ; |
298 | 298 |
} |
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
299 |
while (more() && peek() == '?') { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
300 |
eat('?') ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
301 |
b = ALT(b, ONE) ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
302 |
} |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
303 |
while (more() && peek() == '+') { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
304 |
eat('+') ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
305 |
b = SEQ(b, STAR(b)) ; |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
306 |
} |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
307 |
b |
298 | 308 |
} |
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
309 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
310 |
def Base() : Rexp = { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
311 |
peek() match { |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
312 |
case '(' => { eat('(') ; val r = Regex(); eat(')') ; r } // if groups should be groups RECD("",r) } |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
313 |
case _ => CHAR(next()) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
314 |
} |
298 | 315 |
} |
316 |
} |
|
317 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
318 |
// two simple examples for the regex parser |
298 | 319 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
320 |
println("two simple examples for the regex parser") |
298 | 321 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
322 |
println(string(Parser("a|(bc)*").Regex())) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
323 |
println(string(Parser("(a|b)*(babab(a|b)*bab|bba(a|b)*bab)(a|b)*").Regex())) |
298 | 324 |
|
325 |
||
326 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
327 |
//System.exit(0) |
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
328 |
|
298 | 329 |
// Testing |
330 |
//============ |
|
331 |
||
332 |
def time[T](code: => T) = { |
|
333 |
val start = System.nanoTime() |
|
334 |
val result = code |
|
335 |
val end = System.nanoTime() |
|
336 |
((end - start)/1.0e9).toString |
|
337 |
//result |
|
338 |
} |
|
339 |
||
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
340 |
def timeR[T](code: => T) = { |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
341 |
val start = System.nanoTime() |
300 | 342 |
for (i <- 1 to 10) code |
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
343 |
val result = code |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
344 |
val end = System.nanoTime() |
300 | 345 |
(result, (end - start)) |
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
346 |
} |
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
347 |
|
298 | 348 |
//size: of a Aregx for testing purposes |
349 |
def size(r: Rexp) : Int = r match { |
|
350 |
case ZERO => 1 |
|
351 |
case ONE => 1 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
352 |
case CHAR(_) => 1 |
298 | 353 |
case SEQ(r1, r2) => 1 + size(r1) + size(r2) |
354 |
case ALTS(rs) => 1 + rs.map(size).sum |
|
355 |
case STAR(r) => 1 + size(r) |
|
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
356 |
case RECD(_, r) => size(r) |
298 | 357 |
} |
358 |
||
359 |
//enumerates strings of length n over alphabet cs |
|
360 |
def strs(n: Int, cs: String) : Set[String] = { |
|
361 |
if (n == 0) Set("") |
|
362 |
else { |
|
363 |
val ss = strs(n - 1, cs) |
|
364 |
ss ++ |
|
365 |
(for (s <- ss; c <- cs.toList) yield c + s) |
|
366 |
} |
|
367 |
} |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
368 |
|
298 | 369 |
def enum(n: Int, s: String) : Stream[Rexp] = n match { |
370 |
case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR) |
|
371 |
case n => { |
|
372 |
val rs = enum(n - 1, s) |
|
373 |
rs #::: |
|
374 |
(for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #::: |
|
375 |
(for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #::: |
|
376 |
(for (r1 <- rs) yield STAR(r1)) |
|
377 |
} |
|
378 |
} |
|
379 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
380 |
|
298 | 381 |
|
382 |
||
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
383 |
println("Antimirov Example 5.5") |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
384 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
385 |
val antimirov = Parser("(a|b)*(babab(a|b)*bab|bba(a|b)*bab)(a|b)*").Regex() |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
386 |
val strings = strs(6, "ab") |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
387 |
val pds = strings.flatMap(s => pders(s.toList, antimirov)) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
388 |
val pds_simplified = pds.map(simp(_)._1) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
389 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
390 |
|
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
391 |
println("Unsimplified set") |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
392 |
println(pds.map(string).mkString("\n")) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
393 |
println("Number of pds " + pds.size) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
394 |
println("\nSimplified set") |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
395 |
println(pds_simplified.map(string).mkString("\n")) |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
396 |
println("Number of pds " + pds_simplified.size) |
299
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
397 |
|
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
398 |
|
cae7eab03018
added some timing and size tests when doing the derivatives
Christian Urban <urbanc@in.tum.de>
parents:
298
diff
changeset
|
399 |
|
305 | 400 |
|
312
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
401 |
def fact(n: Int) : Int = |
8b0b414e71b0
added size bounds for partial derivatives
Christian Urban <urbanc@in.tum.de>
parents:
311
diff
changeset
|
402 |
if (n == 0) 1 else n * fact(n - 1) |