author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Sat, 11 Jun 2016 13:28:45 +0100 | |
changeset 196 | 5fa8344a5176 |
parent 195 | c2d36c3cf8ad |
child 197 | a35041d5707c |
permissions | -rw-r--r-- |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
import scala.language.implicitConversions |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
import scala.language.reflectiveCalls |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
import scala.annotation.tailrec |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
import scala.io.Source |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
import scala.util.parsing.combinator._ |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
abstract class Rexp |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
case object ZERO extends Rexp |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
case object ONE extends Rexp |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
case class CHAR(c: Char) extends Rexp { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
override def toString = c.toString |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
} |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
13 |
case object ANYCHAR extends Rexp { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
14 |
override def toString = "." |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
15 |
} |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
override def toString = "(" + r1.toString + "|" + r2.toString + ")" |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
override def toString = "(" + r1.toString + r2.toString +")" |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
case class STAR(r: Rexp) extends Rexp |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
23 |
case class RECD(x: String, r: Rexp) extends Rexp { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
24 |
override def toString = "[" + r.toString +"]" |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
25 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
26 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
27 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
28 |
abstract class Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
29 |
case object Empty extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
30 |
case class Chr(c: Char) extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
31 |
case class Sequ(v1: Val, v2: Val) extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
32 |
case class Left(v: Val) extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
33 |
case class Right(v: Val) extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
34 |
case class Stars(vs: List[Val]) extends Val |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
35 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
36 |
// nullable function: tests whether the regular |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
37 |
// expression can recognise the empty string |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
38 |
def nullable (r: Rexp) : Boolean = r match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
39 |
case ZERO => false |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
40 |
case ONE => true |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
41 |
case CHAR(_) => false |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
42 |
case ANYCHAR => false |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
43 |
case ALT(r1, r2) => nullable(r1) || nullable(r2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
44 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
45 |
case STAR(_) => true |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
46 |
case RECD(_, r1) => nullable(r1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
47 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
48 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
49 |
// derivative of a regular expression w.r.t. a character |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
50 |
def der (c: Char, r: Rexp) : Rexp = r match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
51 |
case ZERO => ZERO |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
52 |
case ONE => ZERO |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
53 |
case CHAR(d) => if (c == d) ONE else ZERO |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
54 |
case ANYCHAR => ONE |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
55 |
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
56 |
case SEQ(r1, r2) => |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
57 |
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
58 |
else SEQ(der(c, r1), r2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
59 |
case STAR(r) => SEQ(der(c, r), STAR(r)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
60 |
case RECD(_, r1) => der(c, r1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
61 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
62 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
63 |
// derivative w.r.t. a string (iterates der) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
64 |
def ders (s: List[Char], r: Rexp) : Rexp = s match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
65 |
case Nil => r |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
66 |
case c::s => ders(s, der(c, r)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
67 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
68 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
69 |
// extracts a string from value |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
70 |
def flatten(v: Val) : String = v match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
71 |
case Empty => "" |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
72 |
case Chr(c) => c.toString |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
73 |
case Left(v) => flatten(v) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
74 |
case Right(v) => flatten(v) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
75 |
case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
76 |
case Stars(vs) => vs.map(flatten).mkString |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
77 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
78 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
79 |
// extracts an environment from a value |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
80 |
def env(v: Val, r: Rexp) : List[(String, String)] = (v, r) match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
81 |
case (Empty, ONE) => Nil |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
82 |
case (Chr(c), CHAR(_)) => Nil |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
83 |
case (Chr(c), ANYCHAR) => Nil |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
84 |
case (Left(v), ALT(r1, r2)) => env(v, r1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
85 |
case (Right(v), ALT(r1, r2)) => env(v, r2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
86 |
case (Sequ(v1, v2), SEQ(r1, r2)) => env(v1, r1) ::: env(v2, r2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
87 |
case (Stars(vs), STAR(r)) => vs.flatMap(env(_, r)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
88 |
case (v, RECD(x, r)) => (x, flatten(v))::env(v, r) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
89 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
90 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
91 |
// extracts indices for the underlying strings |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
92 |
def env2(v: Val, r: Rexp, n: Int) : (List[(Int, Int)], Int) = (v, r) match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
93 |
case (Empty, ONE) => (Nil, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
94 |
case (Chr(c), CHAR(_)) => (Nil, n + 1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
95 |
case (Chr(c), ANYCHAR) => (Nil, n + 1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
96 |
case (Left(v), ALT(r1, r2)) => env2(v, r1, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
97 |
case (Right(v), ALT(r1, r2)) => env2(v, r2, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
98 |
case (Sequ(v1, v2), SEQ(r1, r2)) => { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
99 |
val (e1, n1) = env2(v1, r1, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
100 |
val (e2, n2) = env2(v2, r2, n1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
101 |
(e1 ::: e2, n2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
102 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
103 |
case (Stars(Nil), STAR(r)) => (Nil, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
104 |
case (Stars(v :: vs), STAR(r)) => { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
105 |
val (e1, n1) = env2(v, r, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
106 |
val (e2, n2) = env2(Stars(vs), STAR(r), n1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
107 |
(e1 ::: e2, n2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
108 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
109 |
case (v, RECD(x, r)) => { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
110 |
val (e1, n1) = env2(v, r, n) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
111 |
((n, n + flatten(v).length) :: e1, n1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
112 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
113 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
114 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
115 |
// injection part |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
116 |
def mkeps(r: Rexp) : Val = r match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
117 |
case ONE => Empty |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
118 |
case ALT(r1, r2) => |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
119 |
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
120 |
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
121 |
case STAR(r) => Stars(Nil) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
122 |
case RECD(x, r) => mkeps(r) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
123 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
124 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
125 |
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
126 |
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
127 |
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
128 |
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
129 |
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
130 |
case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
131 |
case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
132 |
case (CHAR(d), Empty) => Chr(c) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
133 |
case (ANYCHAR, Empty) => Chr(c) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
134 |
case (RECD(x, r1), _) => inj(r1, c, v) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
135 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
136 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
137 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
138 |
// main lexing function (produces a value) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
139 |
def lex(r: Rexp, s: List[Char]) : Val = s match { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
140 |
case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
141 |
case c::cs => inj(r, c, lex(der(c, r), cs)) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
142 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
143 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
144 |
def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
145 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
146 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
147 |
|
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
148 |
// Regular expression parser |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
149 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
150 |
case class Parser(s: String) { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
151 |
var i = 0 |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
152 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
153 |
def peek() = s(i) |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
154 |
def eat(c: Char) = |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
155 |
if (c == s(i)) i = i + 1 else throw new Exception("Expected " + c + " got " + s(i)) |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
156 |
def next() = { i = i + 1; s(i - 1) } |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
157 |
def more() = s.length - i > 0 |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
158 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
159 |
def Regex() : Rexp = { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
160 |
val t = Term(); |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
161 |
if (more() && peek() == '|') { |
169
072a701bb153
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
168
diff
changeset
|
162 |
eat ('|') ; |
072a701bb153
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
168
diff
changeset
|
163 |
ALT(t, Regex()) |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
164 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
165 |
else t |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
166 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
167 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
168 |
def Term() : Rexp = { |
169
072a701bb153
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
168
diff
changeset
|
169 |
var f : Rexp = |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
170 |
if (more() && peek() != ')' && peek() != '|') Factor() else ONE; |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
171 |
while (more() && peek() != ')' && peek() != '|') { |
169
072a701bb153
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
168
diff
changeset
|
172 |
f = SEQ(f, Factor()) ; |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
173 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
174 |
f |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
175 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
176 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
177 |
def Factor() : Rexp = { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
178 |
var b = Base(); |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
179 |
while (more() && peek() == '*') { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
180 |
eat('*') ; |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
181 |
b = STAR(b) ; |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
182 |
} |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
183 |
while (more() && peek() == '?') { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
184 |
eat('?') ; |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
185 |
b = ALT(b, ONE) ; |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
186 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
187 |
while (more() && peek() == '+') { |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
188 |
eat('+') ; |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
189 |
b = SEQ(b, STAR(b)) ; |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
190 |
} |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
191 |
b |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
192 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
193 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
194 |
def Base() : Rexp = { |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
195 |
peek() match { |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
196 |
case '(' => { eat('(') ; val r = Regex(); eat(')') ; RECD("",r) } |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
197 |
case '.' => { eat('.'); ANYCHAR } |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
198 |
case _ => CHAR(next()) |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
199 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
200 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
201 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
202 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
203 |
println(Parser("a|(bc)*").Regex()) |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
204 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
205 |
|
196
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
206 |
def process_line(line: String) : String = { |
195
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
207 |
if (line.head == '#') "#" else |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
208 |
{ |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
209 |
val line_split = line.split("\\t+") |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
210 |
val reg_str = line_split(1) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
211 |
val reg = RECD("", Parser(reg_str).Regex()) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
212 |
val in_str = if (line_split(2) == "-") "" else line_split(2) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
213 |
val res_str = line_split(3) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
214 |
val our_val = lexing(reg, in_str) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
215 |
val our_result = env2(our_val, reg, 0)._1.mkString("") |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
216 |
if (our_result != res_str) |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
217 |
{ |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
218 |
reg_str + ": " + |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
219 |
reg.toString + ": " + |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
220 |
in_str + " \n " + |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
221 |
our_result + |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
222 |
" => \n" + res_str + " ! " + |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
223 |
our_val + ":" + reg + "\n" |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
224 |
} |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
225 |
else "*" |
c2d36c3cf8ad
run all posix tests
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
169
diff
changeset
|
226 |
} |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
227 |
} |
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
228 |
|
196
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
229 |
def process_file(name : String) : Unit = { |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
230 |
println("\nProcessing " + name) |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
231 |
val filelines : List[String] = Source.fromFile(name).getLines.toList |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
232 |
filelines.foreach((s: String) => print(process_line(s))) |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
233 |
} |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
234 |
|
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
235 |
|
196
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
236 |
val files = List("../tests/forced-assoc.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
237 |
"../tests/left-assoc.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
238 |
//"../tests/right-assoc.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
239 |
"../tests/class.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
240 |
"../tests/basic3.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
241 |
"../tests/totest.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
242 |
"../tests/repetition2.txt", |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
243 |
"../tests/osx-bsd-critical.txt") |
168
6b0a1976f89a
added parser for regexes
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
244 |
|
196
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
245 |
files.foreach(process_file(_)) |
5fa8344a5176
added test processing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
195
diff
changeset
|
246 |