|
1 import scala.language.implicitConversions |
|
2 import scala.language.reflectiveCalls |
|
3 import scala.annotation.tailrec |
|
4 |
|
5 abstract class Rexp |
|
6 case object ZERO extends Rexp |
|
7 case object ONE extends Rexp |
|
8 case class CHAR(c: Char) extends Rexp |
|
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
11 case class STAR(r: Rexp) extends Rexp |
|
12 |
|
13 abstract class ARexp |
|
14 case object AZERO extends ARexp |
|
15 case class AONE(bs: List[Boolean]) extends ARexp |
|
16 case class ACHAR(bs: List[Boolean], c: Char) extends ARexp |
|
17 case class AALT(bs: List[Boolean], r1: ARexp, r2: ARexp) extends ARexp |
|
18 case class ASEQ(bs: List[Boolean], r1: ARexp, r2: ARexp) extends ARexp |
|
19 case class ASTAR(bs: List[Boolean], r: ARexp) extends ARexp |
|
20 |
|
21 abstract class Val |
|
22 case object Empty extends Val |
|
23 case class Chr(c: Char) extends Val |
|
24 case class Sequ(v1: Val, v2: Val) extends Val |
|
25 case class Left(v: Val) extends Val |
|
26 case class Right(v: Val) extends Val |
|
27 case class Stars(vs: List[Val]) extends Val |
|
28 |
|
29 // some convenience for typing in regular expressions |
|
30 def charlist2rexp(s : List[Char]): Rexp = s match { |
|
31 case Nil => ONE |
|
32 case c::Nil => CHAR(c) |
|
33 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
34 } |
|
35 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
36 |
|
37 implicit def RexpOps(r: Rexp) = new { |
|
38 def | (s: Rexp) = ALT(r, s) |
|
39 def % = STAR(r) |
|
40 def ~ (s: Rexp) = SEQ(r, s) |
|
41 } |
|
42 |
|
43 implicit def stringOps(s: String) = new { |
|
44 def | (r: Rexp) = ALT(s, r) |
|
45 def | (r: String) = ALT(s, r) |
|
46 def % = STAR(s) |
|
47 def ~ (r: Rexp) = SEQ(s, r) |
|
48 def ~ (r: String) = SEQ(s, r) |
|
49 } |
|
50 |
|
51 // translation into ARexps |
|
52 def fuse(bs: List[Boolean], r: ARexp) : ARexp = r match { |
|
53 case AZERO => AZERO |
|
54 case AONE(cs) => AONE(bs ++ cs) |
|
55 case ACHAR(cs, c) => ACHAR(bs ++ cs, c) |
|
56 case AALT(cs, r1, r2) => AALT(bs ++ cs, r1, r2) |
|
57 case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) |
|
58 case ASTAR(cs, r) => ASTAR(bs ++ cs, r) |
|
59 } |
|
60 |
|
61 def internalise(r: Rexp) : ARexp = r match { |
|
62 case ZERO => AZERO |
|
63 case ONE => AONE(Nil) |
|
64 case CHAR(c) => ACHAR(Nil, c) |
|
65 case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2))) |
|
66 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
|
67 case STAR(r) => ASTAR(Nil, internalise(r)) |
|
68 } |
|
69 |
|
70 internalise(("a" | "ab") ~ ("b" | "")) |
|
71 |
|
72 |
|
73 def decode_aux(r: Rexp, bs: List[Boolean]) : (Val, List[Boolean]) = (r, bs) match { |
|
74 case (ONE, bs) => (Empty, bs) |
|
75 case (CHAR(c), bs) => (Chr(c), bs) |
|
76 case (ALT(r1, r2), false::bs) => { |
|
77 val (v, bs1) = decode_aux(r1, bs) |
|
78 (Left(v), bs1) |
|
79 } |
|
80 case (ALT(r1, r2), true::bs) => { |
|
81 val (v, bs1) = decode_aux(r2, bs) |
|
82 (Right(v), bs1) |
|
83 } |
|
84 case (SEQ(r1, r2), bs) => { |
|
85 val (v1, bs1) = decode_aux(r1, bs) |
|
86 val (v2, bs2) = decode_aux(r2, bs1) |
|
87 (Sequ(v1, v2), bs2) |
|
88 } |
|
89 case (STAR(r), false::bs) => { |
|
90 val (v, bs1) = decode_aux(r, bs) |
|
91 val (Stars(vs), bs2) = decode_aux(STAR(r), bs1) |
|
92 (Stars(v::vs), bs2) |
|
93 } |
|
94 case (STAR(r), true::bs) => (Stars(Nil), bs) |
|
95 } |
|
96 |
|
97 def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match { |
|
98 case (v, Nil) => v |
|
99 case _ => throw new Exception("Not decodable") |
|
100 } |
|
101 |
|
102 // nullable function: tests whether the aregular |
|
103 // expression can recognise the empty string |
|
104 def nullable (r: ARexp) : Boolean = r match { |
|
105 case AZERO => false |
|
106 case AONE(_) => true |
|
107 case ACHAR(_,_) => false |
|
108 case AALT(_, r1, r2) => nullable(r1) || nullable(r2) |
|
109 case ASEQ(_, r1, r2) => nullable(r1) && nullable(r2) |
|
110 case ASTAR(_, _) => true |
|
111 } |
|
112 |
|
113 def mkepsBC(r: ARexp) : List[Boolean] = r match { |
|
114 case AONE(bs) => bs |
|
115 case AALT(bs, r1, r2) => |
|
116 if (nullable(r1)) bs ++ mkepsBC(r1) else bs ++ mkepsBC(r2) |
|
117 case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2) |
|
118 case ASTAR(bs, r) => bs ++ List(true) |
|
119 } |
|
120 |
|
121 // derivative of a regular expression w.r.t. a character |
|
122 def der (c: Char, r: ARexp) : ARexp = r match { |
|
123 case AZERO => AZERO |
|
124 case AONE(_) => AZERO |
|
125 case ACHAR(bs, d) => if (c == d) AONE(bs) else AZERO |
|
126 case AALT(bs, r1, r2) => AALT(bs, der(c, r1), der(c, r2)) |
|
127 case ASEQ(bs, r1, r2) => |
|
128 if (nullable(r1)) AALT(bs, ASEQ(Nil, der(c, r1), r2), fuse(mkepsBC(r1), der(c, r2))) |
|
129 else ASEQ(bs, der(c, r1), r2) |
|
130 case ASTAR(bs, r) => ASEQ(bs, fuse(List(false), der(c, r)), ASTAR(Nil, r)) |
|
131 } |
|
132 |
|
133 // derivative w.r.t. a string (iterates der) |
|
134 @tailrec |
|
135 def ders (s: List[Char], r: ARexp) : ARexp = s match { |
|
136 case Nil => r |
|
137 case c::s => ders(s, der(c, r)) |
|
138 } |
|
139 |
|
140 // main unsimplified lexing function (produces a value) |
|
141 def lex(r: ARexp, s: List[Char]) : List[Boolean] = s match { |
|
142 case Nil => if (nullable(r)) mkepsBC(r) else throw new Exception("Not matched") |
|
143 case c::cs => lex(der(c, r), cs) |
|
144 } |
|
145 |
|
146 def lexing(r: Rexp, s: String) : Val = decode(r, lex(internalise(r), s.toList)) |
|
147 |
|
148 // Some Tests |
|
149 //============ |
|
150 |
|
151 def time_needed[T](i: Int, code: => T) = { |
|
152 val start = System.nanoTime() |
|
153 for (j <- 1 to i) code |
|
154 val end = System.nanoTime() |
|
155 (end - start)/(i * 1.0e9) |
|
156 } |
|
157 |
|
158 |
|
159 |
|
160 |
|
161 val r0 = ("a" | "ab") ~ ("b" | "") |
|
162 println(lexing(r0, "ab")) |
|
163 |
|
164 val r1 = ("a" | "ab") ~ ("bcd" | "cd") |
|
165 println(lexing(r1, "abcd")) |
|
166 |
|
167 println(lexing((("" | "a") ~ ("ab" | "b")), "ab")) |
|
168 println(lexing((("" | "a") ~ ("b" | "ab")), "ab")) |
|
169 println(lexing((("" | "a") ~ ("c" | "ab")), "ab")) |
|
170 |
|
171 |
|
172 |
|
173 // Two Simple Tests for the While Language |
|
174 //======================================== |
|
175 |
|
176 // Lexing Rules |
|
177 |
|
178 def PLUS(r: Rexp) = r ~ r.% |
|
179 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
|
180 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
|
181 val ID = SYM ~ (SYM | DIGIT).% |
|
182 val NUM = PLUS(DIGIT) |
|
183 val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" |
|
184 val SEMI: Rexp = ";" |
|
185 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" |
|
186 val WHITESPACE = PLUS(" " | "\n" | "\t") |
|
187 val RPAREN: Rexp = ")" |
|
188 val LPAREN: Rexp = "(" |
|
189 val BEGIN: Rexp = "{" |
|
190 val END: Rexp = "}" |
|
191 val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
|
192 |
|
193 val WHILE_REGS = (KEYWORD | |
|
194 ID | |
|
195 OP | |
|
196 NUM | |
|
197 SEMI | |
|
198 LPAREN | RPAREN | |
|
199 BEGIN | END | |
|
200 WHITESPACE).% |
|
201 |
|
202 |
|
203 println("prog0 test") |
|
204 |
|
205 val prog0 = """read n""" |
|
206 println(lexing(WHILE_REGS, prog0)) |
|
207 |
|
208 println("prog1 test") |
|
209 |
|
210 val prog1 = """read n; write (n)""" |
|
211 println(lexing(WHILE_REGS, prog1)) |
|
212 |
|
213 |