1 import scala.language.implicitConversions |
1 import scala.language.implicitConversions |
2 import scala.language.reflectiveCalls |
2 import scala.language.reflectiveCalls |
3 import scala.annotation.tailrec |
3 import scala.annotation.tailrec |
4 |
4 |
5 abstract class Rexp |
5 abstract class Rexp |
6 case object NULL extends Rexp |
6 case object ZERO extends Rexp |
7 case object EMPTY extends Rexp |
7 case object ONE extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
11 case class STAR(r: Rexp) extends Rexp |
11 case class STAR(r: Rexp) extends Rexp |
12 case class RECD(x: String, r: Rexp) extends Rexp |
12 case class RECD(x: String, r: Rexp) extends Rexp |
13 |
13 |
14 abstract class Val |
14 abstract class Val |
15 case object Void extends Val |
15 case object Empty extends Val |
16 case class Chr(c: Char) extends Val |
16 case class Chr(c: Char) extends Val |
17 case class Sequ(v1: Val, v2: Val) extends Val |
17 case class Sequ(v1: Val, v2: Val) extends Val |
18 case class Left(v: Val) extends Val |
18 case class Left(v: Val) extends Val |
19 case class Right(v: Val) extends Val |
19 case class Right(v: Val) extends Val |
20 case class Stars(vs: List[Val]) extends Val |
20 case class Stars(vs: List[Val]) extends Val |
21 case class Rec(x: String, v: Val) extends Val |
21 case class Rec(x: String, v: Val) extends Val |
22 |
22 |
23 // some convenience for typing in regular expressions |
23 // some convenience for typing in regular expressions |
24 def charlist2rexp(s : List[Char]): Rexp = s match { |
24 def charlist2rexp(s : List[Char]): Rexp = s match { |
25 case Nil => EMPTY |
25 case Nil => ONE |
26 case c::Nil => CHAR(c) |
26 case c::Nil => CHAR(c) |
27 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
27 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
28 } |
28 } |
29 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
29 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
30 |
30 |
56 |
56 |
57 |
57 |
58 // nullable function: tests whether the regular |
58 // nullable function: tests whether the regular |
59 // expression can recognise the empty string |
59 // expression can recognise the empty string |
60 def nullable (r: Rexp) : Boolean = r match { |
60 def nullable (r: Rexp) : Boolean = r match { |
61 case NULL => false |
61 case ZERO => false |
62 case EMPTY => true |
62 case ONE => true |
63 case CHAR(_) => false |
63 case CHAR(_) => false |
64 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
64 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
65 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
65 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
66 case STAR(_) => true |
66 case STAR(_) => true |
67 case RECD(_, r1) => nullable(r1) |
67 case RECD(_, r1) => nullable(r1) |
68 } |
68 } |
69 |
69 |
70 // derivative of a regular expression w.r.t. a character |
70 // derivative of a regular expression w.r.t. a character |
71 def der (c: Char, r: Rexp) : Rexp = r match { |
71 def der (c: Char, r: Rexp) : Rexp = r match { |
72 case NULL => NULL |
72 case ZERO => ZERO |
73 case EMPTY => NULL |
73 case ONE => ZERO |
74 case CHAR(d) => if (c == d) EMPTY else NULL |
74 case CHAR(d) => if (c == d) ONE else ZERO |
75 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
75 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
76 case SEQ(r1, r2) => |
76 case SEQ(r1, r2) => |
77 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
77 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
78 else SEQ(der(c, r1), r2) |
78 else SEQ(der(c, r1), r2) |
79 case STAR(r) => SEQ(der(c, r), STAR(r)) |
79 case STAR(r) => SEQ(der(c, r), STAR(r)) |
86 case c::s => ders(s, der(c, r)) |
86 case c::s => ders(s, der(c, r)) |
87 } |
87 } |
88 |
88 |
89 // extracts a string from value |
89 // extracts a string from value |
90 def flatten(v: Val) : String = v match { |
90 def flatten(v: Val) : String = v match { |
91 case Void => "" |
91 case Empty => "" |
92 case Chr(c) => c.toString |
92 case Chr(c) => c.toString |
93 case Left(v) => flatten(v) |
93 case Left(v) => flatten(v) |
94 case Right(v) => flatten(v) |
94 case Right(v) => flatten(v) |
95 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
95 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
96 case Stars(vs) => vs.map(flatten).mkString |
96 case Stars(vs) => vs.map(flatten).mkString |
97 case Rec(_, v) => flatten(v) |
97 case Rec(_, v) => flatten(v) |
98 } |
98 } |
99 |
99 |
100 // extracts an environment from a value |
100 // extracts an environment from a value |
101 def env(v: Val) : List[(String, String)] = v match { |
101 def env(v: Val) : List[(String, String)] = v match { |
102 case Void => Nil |
102 case Empty => Nil |
103 case Chr(c) => Nil |
103 case Chr(c) => Nil |
104 case Left(v) => env(v) |
104 case Left(v) => env(v) |
105 case Right(v) => env(v) |
105 case Right(v) => env(v) |
106 case Sequ(v1, v2) => env(v1) ::: env(v2) |
106 case Sequ(v1, v2) => env(v1) ::: env(v2) |
107 case Stars(vs) => vs.flatMap(env) |
107 case Stars(vs) => vs.flatMap(env) |
108 case Rec(x, v) => (x, flatten(v))::env(v) |
108 case Rec(x, v) => (x, flatten(v))::env(v) |
109 } |
|
110 |
|
111 def mkeps_all(r: Rexp) : Set[Val] = r match { |
|
112 case EMPTY => Set(Void) |
|
113 case ALT(r1, r2) => (nullable(r1), nullable(r2)) match { |
|
114 case (true, true) => mkeps_all(r1).map(Left) ++ mkeps_all(r2).map(Right) |
|
115 case (true, false) => mkeps_all(r1).map(Left) |
|
116 case (false, true) => mkeps_all(r2).map(Right) |
|
117 } |
|
118 case SEQ(r1, r2) => for (v1 <- mkeps_all(r1); |
|
119 v2 <- mkeps_all(r2)) yield Sequ(v1, v2) |
|
120 case STAR(r) => Set(Stars(Nil), Stars(List(mkeps(r)))) |
|
121 case RECD(x, r) => for (v <- mkeps_all(r)) yield Rec(x, v) |
|
122 } |
109 } |
123 |
110 |
124 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
111 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
125 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
112 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
126 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
113 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
127 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
114 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
128 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
115 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
129 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
116 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
130 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
117 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
131 case (CHAR(d), Void) => Chr(c) |
118 case (CHAR(d), Empty) => Chr(c) |
132 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
119 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
133 } |
120 } |
134 |
121 |
135 def inj_all(r: Rexp, c: Char, vs: Set[Val]) : Set[Val] = |
|
136 for (v <- vs) yield inj(r, c, v) |
|
137 |
122 |
138 // main lexing function (produces a value) |
123 // main lexing function (produces a value) |
139 def lex(r: Rexp, s: List[Char]) : Val = s match { |
124 def lex(r: Rexp, s: List[Char]) : Val = s match { |
140 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
125 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
141 case c::cs => inj(r, c, lex(der(c, r), cs)) |
126 case c::cs => inj(r, c, lex(der(c, r), cs)) |