11 case class STAR(r: Rexp) extends Rexp |
11 case class STAR(r: Rexp) extends Rexp |
12 case class RECD(x: String, r: Rexp) extends Rexp |
12 case class RECD(x: String, r: Rexp) extends Rexp |
13 case class CRANGE(cs: String) extends Rexp |
13 case class CRANGE(cs: String) extends Rexp |
14 case class PLUS(r: Rexp) extends Rexp |
14 case class PLUS(r: Rexp) extends Rexp |
15 case class OPT(r: Rexp) extends Rexp |
15 case class OPT(r: Rexp) extends Rexp |
|
16 case class NTIMES(r: Rexp, n: Int) extends Rexp |
16 |
17 |
17 abstract class Val |
18 abstract class Val |
18 case object Empty extends Val |
19 case object Empty extends Val |
19 case class Chr(c: Char) extends Val |
20 case class Chr(c: Char) extends Val |
20 case class Seq(v1: Val, v2: Val) extends Val |
21 case class Seq(v1: Val, v2: Val) extends Val |
57 case STAR(_) => true |
58 case STAR(_) => true |
58 case RECD(_, r) => nullable(r) |
59 case RECD(_, r) => nullable(r) |
59 case CRANGE(_) => false |
60 case CRANGE(_) => false |
60 case PLUS(r) => nullable(r) |
61 case PLUS(r) => nullable(r) |
61 case OPT(_) => true |
62 case OPT(_) => true |
|
63 case NTIMES(r, n) => if (n == 0) true else nullable(r) |
62 } |
64 } |
63 |
65 |
64 // derivative of a regular expression w.r.t. a character |
66 // derivative of a regular expression w.r.t. a character |
65 def der (c: Char, r: Rexp) : Rexp = r match { |
67 def der (c: Char, r: Rexp) : Rexp = r match { |
66 case NULL => NULL |
68 case NULL => NULL |
73 case STAR(r) => SEQ(der(c, r), STAR(r)) |
75 case STAR(r) => SEQ(der(c, r), STAR(r)) |
74 case RECD(_, r1) => der(c, r1) |
76 case RECD(_, r1) => der(c, r1) |
75 case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL |
77 case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL |
76 case PLUS(r) => SEQ(der(c, r), STAR(r)) |
78 case PLUS(r) => SEQ(der(c, r), STAR(r)) |
77 case OPT(r) => ALT(der(c, r), NULL) |
79 case OPT(r) => ALT(der(c, r), NULL) |
|
80 case NTIMES(r, n) => if (n == 0) NULL else der(c, SEQ(r, NTIMES(r, n - 1))) |
78 } |
81 } |
79 |
82 |
80 // derivative w.r.t. a string (iterates der) |
83 // derivative w.r.t. a string (iterates der) |
81 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
84 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
82 case Nil => r |
85 case Nil => r |
113 case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2)) |
116 case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2)) |
114 case STAR(r) => Stars(Nil) |
117 case STAR(r) => Stars(Nil) |
115 case RECD(x, r) => Rec(x, mkeps(r)) |
118 case RECD(x, r) => Rec(x, mkeps(r)) |
116 case PLUS(r) => Stars(List(mkeps(r))) |
119 case PLUS(r) => Stars(List(mkeps(r))) |
117 case OPT(_) => Right(Empty) |
120 case OPT(_) => Right(Empty) |
|
121 case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r))) |
|
122 case _ => { println ("r : " + r.toString); |
|
123 throw new Exception("mkeps error")} |
118 } |
124 } |
119 |
125 |
120 |
126 |
121 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
127 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
122 case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
128 case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
128 case (CHAR(_), Empty) => Chr(c) |
134 case (CHAR(_), Empty) => Chr(c) |
129 case (CRANGE(_), Empty) => Chr(c) |
135 case (CRANGE(_), Empty) => Chr(c) |
130 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
136 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
131 case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
137 case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
132 case (OPT(r), Left(v1)) => Left(inj(r, c, v1)) |
138 case (OPT(r), Left(v1)) => Left(inj(r, c, v1)) |
|
139 case (NTIMES(r, n), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
140 case (NTIMES(r, n), Left(Seq(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs) |
|
141 case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs) |
|
142 case _ => { println ("r : " + r.toString + " v: " + v.toString); |
|
143 throw new Exception("inj error")} |
133 } |
144 } |
134 |
145 |
135 // main lexing function (produces a value) |
146 // main lexing function (produces a value) |
136 def lex(r: Rexp, s: List[Char]) : Val = s match { |
147 def lex(r: Rexp, s: List[Char]) : Val = s match { |
137 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
148 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
238 |
249 |
239 // filters out all white spaces |
250 // filters out all white spaces |
240 def tokenise(r: Rexp, s: String) = |
251 def tokenise(r: Rexp, s: String) = |
241 env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}.mkString("\n") |
252 env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}.mkString("\n") |
242 |
253 |
243 |
|
244 // Testing |
254 // Testing |
245 //============ |
255 //============ |
246 |
256 |
247 def time[T](code: => T) = { |
257 def time[T](code: => T) = { |
248 val start = System.nanoTime() |
258 val start = System.nanoTime() |
249 val result = code |
259 val result = code |
250 val end = System.nanoTime() |
260 val end = System.nanoTime() |
251 println((end - start)/1.0e9) |
261 println((end - start)/1.0e9) |
252 result |
262 result |
253 } |
263 } |
|
264 |
|
265 println(lexing_simp(OPT("1"), "1")) |
|
266 println(lexing_simp(OPT("1"), "")) |
|
267 println(ders("111".toList, NTIMES("1",3))) |
|
268 println(lexing_simp(NTIMES("1",3), "111")) |
254 |
269 |
255 val r1 = ("a" | "ab") ~ ("bcd" | "c") |
270 val r1 = ("a" | "ab") ~ ("bcd" | "c") |
256 println(lexing(r1, "abcd")) |
271 println(lexing(r1, "abcd")) |
257 |
272 |
258 val r2 = ("" | "a") ~ ("ab" | "b") |
273 val r2 = ("" | "a") ~ ("ab" | "b") |