8 case object ZERO extends Rexp |
8 case object ZERO extends Rexp |
9 case object ONE extends Rexp |
9 case object ONE extends Rexp |
10 case class CHAR(c: Char) extends Rexp { |
10 case class CHAR(c: Char) extends Rexp { |
11 override def toString = c.toString |
11 override def toString = c.toString |
12 } |
12 } |
|
13 case object ANYCHAR extends Rexp { |
|
14 override def toString = "." |
|
15 } |
13 case class ALT(r1: Rexp, r2: Rexp) extends Rexp { |
16 case class ALT(r1: Rexp, r2: Rexp) extends Rexp { |
14 override def toString = "(" + r1.toString + "|" + r2.toString + ")" |
17 override def toString = "(" + r1.toString + "|" + r2.toString + ")" |
15 } |
18 } |
16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { |
19 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { |
17 override def toString = "(" + r1.toString + r2.toString +")" |
20 override def toString = "(" + r1.toString + r2.toString +")" |
18 } |
21 } |
19 case class STAR(r: Rexp) extends Rexp |
22 case class STAR(r: Rexp) extends Rexp |
20 case class RECD(x: String, r: Rexp) extends Rexp |
23 case class RECD(x: String, r: Rexp) extends Rexp { |
|
24 override def toString = "[" + r.toString +"]" |
|
25 } |
|
26 |
|
27 |
|
28 abstract class Val |
|
29 case object Empty extends Val |
|
30 case class Chr(c: Char) extends Val |
|
31 case class Sequ(v1: Val, v2: Val) extends Val |
|
32 case class Left(v: Val) extends Val |
|
33 case class Right(v: Val) extends Val |
|
34 case class Stars(vs: List[Val]) extends Val |
|
35 |
|
36 // nullable function: tests whether the regular |
|
37 // expression can recognise the empty string |
|
38 def nullable (r: Rexp) : Boolean = r match { |
|
39 case ZERO => false |
|
40 case ONE => true |
|
41 case CHAR(_) => false |
|
42 case ANYCHAR => false |
|
43 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
44 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
45 case STAR(_) => true |
|
46 case RECD(_, r1) => nullable(r1) |
|
47 } |
|
48 |
|
49 // derivative of a regular expression w.r.t. a character |
|
50 def der (c: Char, r: Rexp) : Rexp = r match { |
|
51 case ZERO => ZERO |
|
52 case ONE => ZERO |
|
53 case CHAR(d) => if (c == d) ONE else ZERO |
|
54 case ANYCHAR => ONE |
|
55 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
56 case SEQ(r1, r2) => |
|
57 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
58 else SEQ(der(c, r1), r2) |
|
59 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
60 case RECD(_, r1) => der(c, r1) |
|
61 } |
|
62 |
|
63 // derivative w.r.t. a string (iterates der) |
|
64 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
|
65 case Nil => r |
|
66 case c::s => ders(s, der(c, r)) |
|
67 } |
|
68 |
|
69 // extracts a string from value |
|
70 def flatten(v: Val) : String = v match { |
|
71 case Empty => "" |
|
72 case Chr(c) => c.toString |
|
73 case Left(v) => flatten(v) |
|
74 case Right(v) => flatten(v) |
|
75 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
|
76 case Stars(vs) => vs.map(flatten).mkString |
|
77 } |
|
78 |
|
79 // extracts an environment from a value |
|
80 def env(v: Val, r: Rexp) : List[(String, String)] = (v, r) match { |
|
81 case (Empty, ONE) => Nil |
|
82 case (Chr(c), CHAR(_)) => Nil |
|
83 case (Chr(c), ANYCHAR) => Nil |
|
84 case (Left(v), ALT(r1, r2)) => env(v, r1) |
|
85 case (Right(v), ALT(r1, r2)) => env(v, r2) |
|
86 case (Sequ(v1, v2), SEQ(r1, r2)) => env(v1, r1) ::: env(v2, r2) |
|
87 case (Stars(vs), STAR(r)) => vs.flatMap(env(_, r)) |
|
88 case (v, RECD(x, r)) => (x, flatten(v))::env(v, r) |
|
89 } |
|
90 |
|
91 // extracts indices for the underlying strings |
|
92 def env2(v: Val, r: Rexp, n: Int) : (List[(Int, Int)], Int) = (v, r) match { |
|
93 case (Empty, ONE) => (Nil, n) |
|
94 case (Chr(c), CHAR(_)) => (Nil, n + 1) |
|
95 case (Chr(c), ANYCHAR) => (Nil, n + 1) |
|
96 case (Left(v), ALT(r1, r2)) => env2(v, r1, n) |
|
97 case (Right(v), ALT(r1, r2)) => env2(v, r2, n) |
|
98 case (Sequ(v1, v2), SEQ(r1, r2)) => { |
|
99 val (e1, n1) = env2(v1, r1, n) |
|
100 val (e2, n2) = env2(v2, r2, n1) |
|
101 (e1 ::: e2, n2) |
|
102 } |
|
103 case (Stars(Nil), STAR(r)) => (Nil, n) |
|
104 case (Stars(v :: vs), STAR(r)) => { |
|
105 val (e1, n1) = env2(v, r, n) |
|
106 val (e2, n2) = env2(Stars(vs), STAR(r), n1) |
|
107 (e1 ::: e2, n2) |
|
108 } |
|
109 case (v, RECD(x, r)) => { |
|
110 val (e1, n1) = env2(v, r, n) |
|
111 ((n, n + flatten(v).length) :: e1, n1) |
|
112 } |
|
113 } |
|
114 |
|
115 // injection part |
|
116 def mkeps(r: Rexp) : Val = r match { |
|
117 case ONE => Empty |
|
118 case ALT(r1, r2) => |
|
119 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
|
120 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
|
121 case STAR(r) => Stars(Nil) |
|
122 case RECD(x, r) => mkeps(r) |
|
123 } |
|
124 |
|
125 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
|
126 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
|
127 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
|
128 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
|
129 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
|
130 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
|
131 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
|
132 case (CHAR(d), Empty) => Chr(c) |
|
133 case (ANYCHAR, Empty) => Chr(c) |
|
134 case (RECD(x, r1), _) => inj(r1, c, v) |
|
135 } |
|
136 |
|
137 |
|
138 // main lexing function (produces a value) |
|
139 def lex(r: Rexp, s: List[Char]) : Val = s match { |
|
140 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
|
141 case c::cs => inj(r, c, lex(der(c, r), cs)) |
|
142 } |
|
143 |
|
144 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
|
145 |
|
146 |
|
147 |
|
148 // Regular expression parser |
21 |
149 |
22 case class Parser(s: String) { |
150 case class Parser(s: String) { |
23 var i = 0 |
151 var i = 0 |
24 |
152 |
25 def peek() = s(i) |
153 def peek() = s(i) |
37 else t |
165 else t |
38 } |
166 } |
39 |
167 |
40 def Term() : Rexp = { |
168 def Term() : Rexp = { |
41 var f : Rexp = |
169 var f : Rexp = |
42 if (more() && peek() != ')' && peek() != '|') Factor() else ZERO; |
170 if (more() && peek() != ')' && peek() != '|') Factor() else ONE; |
43 while (more() && peek() != ')' && peek() != '|') { |
171 while (more() && peek() != ')' && peek() != '|') { |
44 f = SEQ(f, Factor()) ; |
172 f = SEQ(f, Factor()) ; |
45 } |
173 } |
46 |
|
47 f |
174 f |
48 } |
175 } |
49 |
176 |
50 def Factor() : Rexp = { |
177 def Factor() : Rexp = { |
51 var b = Base(); |
178 var b = Base(); |
52 while (more() && peek() == '*') { |
179 while (more() && peek() == '*') { |
53 eat('*') ; |
180 eat('*') ; |
54 b = STAR(b) ; |
181 b = STAR(b) ; |
55 } |
182 } |
56 |
183 while (more() && peek() == '?') { |
|
184 eat('?') ; |
|
185 b = ALT(b, ONE) ; |
|
186 } |
|
187 while (more() && peek() == '+') { |
|
188 eat('+') ; |
|
189 b = SEQ(b, STAR(b)) ; |
|
190 } |
57 b |
191 b |
58 } |
192 } |
59 |
193 |
60 def Base() : Rexp = { |
194 def Base() : Rexp = { |
61 peek() match { |
195 peek() match { |
62 case '(' => { eat('(') ; val r = Regex(); eat(')') ; r } |
196 case '(' => { eat('(') ; val r = Regex(); eat(')') ; RECD("",r) } |
|
197 case '.' => { eat('.'); ANYCHAR } |
63 case _ => CHAR(next()) |
198 case _ => CHAR(next()) |
64 } |
199 } |
65 } |
200 } |
66 } |
201 } |
67 |
202 |
68 println(Parser("a|(bc)*").Regex()) |
203 println(Parser("a|(bc)*").Regex()) |
69 |
204 |
70 |
205 |
71 def process(line: String) : String = { |
206 def process(line: String) : String = { |
72 val s = line.split("\\t+")(1) |
207 if (line.head == '#') "#" else |
73 s + ": " + Parser(s).Regex().toString |
208 { |
74 } |
209 val line_split = line.split("\\t+") |
75 |
210 val reg_str = line_split(1) |
76 |
211 val reg = RECD("", Parser(reg_str).Regex()) |
77 val filename = "../tests/forced-assoc.txt" |
212 val in_str = if (line_split(2) == "-") "" else line_split(2) |
|
213 val res_str = line_split(3) |
|
214 val our_val = lexing(reg, in_str) |
|
215 val our_result = env2(our_val, reg, 0)._1.mkString("") |
|
216 if (our_result != res_str) |
|
217 { |
|
218 reg_str + ": " + |
|
219 reg.toString + ": " + |
|
220 in_str + " \n " + |
|
221 our_result + |
|
222 " => \n" + res_str + " ! " + |
|
223 our_val + ":" + reg + "\n" |
|
224 } |
|
225 else "*" |
|
226 } |
|
227 } |
|
228 |
|
229 |
|
230 //val filename = "../tests/forced-assoc.txt" |
|
231 //val filename = "../tests/left-assoc.txt" |
|
232 //val filename = "../tests/right-assoc.txt" |
|
233 //val filename = "../tests/class.txt" |
|
234 //val filename = "../tests/basic3.txt" |
|
235 //val filename = "../tests/totest.txt" |
|
236 //val filename = "../tests/repetition2.txt" |
|
237 //val filename = "../tests/osx-bsd-critical.txt" |
|
238 val filename = "../tests/nullsub3.txt" |
78 val filelines : List[String] = Source.fromFile(filename).getLines.toList |
239 val filelines : List[String] = Source.fromFile(filename).getLines.toList |
79 |
240 |
80 |
241 |
81 filelines.foreach((s: String) => println(process(s))) |
242 filelines.foreach((s: String) => print(process(s))) |
82 |
243 |