| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Thu, 19 Sep 2024 19:25:13 +0100 | |
| changeset 963 | 4e3f7b3574a9 | 
| parent 742 | 155426396b5f | 
| permissions | -rw-r--r-- | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
1  | 
import scala.annotation.tailrec  | 
| 490 | 2  | 
import scala.language.implicitConversions  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
3  | 
|
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
4  | 
abstract class Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
5  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
6  | 
case object NULL extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
7  | 
case object EMPTY extends Rexp  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
8  | 
case object ALLCHAR extends Rexp  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
9  | 
case class CHAR(c: Char) extends Rexp  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
10  | 
case class STR(s: String) extends Rexp  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
11  | 
case class ALT(r1: Rexp, r2: Rexp) extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
12  | 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
13  | 
case class STAR(r: Rexp) extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
14  | 
case class NOT(r: Rexp) extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
15  | 
case class REP(r: Rexp, n: Int) extends Rexp  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
16  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
17  | 
// some convenience for typing in regular expressions  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
18  | 
implicit def string2rexp(s : String) : Rexp = STR(s)  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
19  | 
|
| 499 | 20  | 
implicit def RexpOps(r: Rexp) : Rexp = new {
 | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
21  | 
def | (s: Rexp) = ALT(r, s)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
22  | 
def % = STAR(r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
23  | 
def %(n: Int) = REP(r, n)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
24  | 
def %%(n: Int) = SEQ(REP(r, n), STAR(r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
25  | 
def ? = ALT(EMPTY, r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
26  | 
def unary_! = NOT(r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
27  | 
def ~ (s: Rexp) = SEQ(r, s)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
28  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
29  | 
|
| 499 | 30  | 
implicit def stringOps(s: String) : Rexp = new {
 | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
31  | 
def | (r: Rexp) = ALT(s, r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
32  | 
def | (r: String) = ALT(s, r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
33  | 
def % = STAR(s)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
34  | 
def %(n: Int) = REP(s, n)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
35  | 
def %%(n: Int) = SEQ(REP(s, n), STAR(s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
36  | 
def ? = ALT(EMPTY, s)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
37  | 
def unary_! = NOT(s)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
38  | 
def ~ (r: Rexp) = SEQ(s, r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
39  | 
def ~ (r: String) = SEQ(s, r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
40  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
41  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
42  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
43  | 
// nullable function: tests whether the regular  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
44  | 
// expression can recognise the empty string  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
45  | 
|
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
46  | 
def nullable (r: Rexp) : Boolean = r match {
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
47  | 
case NULL => false  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
48  | 
case EMPTY => true  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
49  | 
case ALLCHAR => false  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
50  | 
case CHAR(_) => false  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
51  | 
case STR(s) => s.isEmpty  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
52  | 
case ALT(r1, r2) => nullable(r1) || nullable(r2)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
53  | 
case SEQ(r1, r2) => nullable(r1) && nullable(r2)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
54  | 
case STAR(_) => true  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
55  | 
case NOT(r) => !(nullable(r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
56  | 
case REP(r, i) => if (i == 0) true else nullable(r)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
57  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
58  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
59  | 
// derivative of a regular expression w.r.t. a character  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
60  | 
def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
61  | 
case NULL => NULL  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
62  | 
case EMPTY => NULL  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
63  | 
case ALLCHAR => EMPTY  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
64  | 
case CHAR(d) => if (c == d) EMPTY else NULL  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
65  | 
case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail)  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
66  | 
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
67  | 
case SEQ(r1, r2) =>  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
68  | 
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
69  | 
else SEQ(der(c, r1), r2)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
70  | 
case STAR(r) => SEQ(der(c, r), STAR(r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
71  | 
case NOT(r) => NOT(der (c, r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
72  | 
case REP(r, i) =>  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
73  | 
if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
74  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
75  | 
|
| 490 | 76  | 
|
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
77  | 
// derivative w.r.t. a string (iterates der)  | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
78  | 
@tailrec  | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
79  | 
def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
80  | 
case Nil => r  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
81  | 
case c::s => ders(s, der(c, r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
82  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
83  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
84  | 
// main matcher function  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
85  | 
def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
86  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
87  | 
//examples  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
88  | 
val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
89  | 
val int = ("+" | "-").? ~ digit.%%(1)
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
90  | 
val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
91  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
92  | 
val ints = List("0", "-4534", "+049", "99")
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
93  | 
val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
94  | 
val errs = List("", "-", "+", "+-1", "-+2", "2-")
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
95  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
96  | 
ints.map(s => matcher(int, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
97  | 
reals.map(s => matcher(int, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
98  | 
errs.map(s => matcher(int, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
99  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
100  | 
ints.map(s => matcher(real, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
101  | 
reals.map(s => matcher(real, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
102  | 
errs.map(s => matcher(real, s))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
103  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
104  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
105  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
106  | 
def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
107  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
108  | 
def time_needed[T](i: Int, code: => T) = {
 | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
109  | 
val start = System.nanoTime()  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
110  | 
for (j <- 1 to i) code  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
111  | 
val end = System.nanoTime()  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
112  | 
(end - start)/(i * 1.0e9)  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
113  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
114  | 
|
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
91 
diff
changeset
 | 
115  | 
for (i <- 1 to 12000 by 500) {
 | 
| 
91
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
116  | 
println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
117  | 
}  | 
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
118  | 
|
| 
 
47f86885d481
added
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
119  |