390
|
1 |
// package zre
|
|
2 |
//Zre5: eliminated mems table
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
import scala.collection.mutable.{Map => MMap}
|
|
7 |
import scala.collection.mutable.{ArrayBuffer => MList}
|
|
8 |
//import pprint._
|
|
9 |
|
|
10 |
import scala.util.Try
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
abstract class Val
|
|
15 |
case object Empty extends Val
|
|
16 |
case class Chr(c: Char) extends Val
|
|
17 |
case class Sequ(v1: Val, v2: Val) extends Val
|
|
18 |
case class Left(v: Val) extends Val
|
|
19 |
case class Right(v: Val) extends Val
|
|
20 |
case class Stars(vs: List[Val]) extends Val
|
|
21 |
case object DummyFilling extends Val
|
|
22 |
|
|
23 |
|
|
24 |
// abstract class Rexp {
|
|
25 |
// def equals(other: Rexp) : Boolean = this.eq(other)
|
|
26 |
// }
|
|
27 |
abstract class Rexp
|
|
28 |
case object ZERO extends Rexp // matches nothing
|
|
29 |
case object ONE extends Rexp // matches an empty string
|
|
30 |
case class CHAR(c: Char) extends Rexp // matches a character c
|
|
31 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative
|
|
32 |
case class AL1(r1: Rexp) extends Rexp
|
|
33 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence
|
|
34 |
case class STAR(r: Rexp) extends Rexp
|
|
35 |
case object RTOP extends Rexp
|
|
36 |
|
|
37 |
|
|
38 |
//Seq a b --> Seq Seqa Seqb
|
|
39 |
//Seq a b --> Sequ chra chrb
|
|
40 |
//ALT r1 r2 --> mALT
|
|
41 |
// AltC L AltC R
|
|
42 |
var cyclicPreventionList : Set[Int]= Set()
|
|
43 |
abstract class Ctx
|
|
44 |
case object RootC extends Ctx
|
|
45 |
case class SeqC( mForMyself: Mem, processedSibling: List[Val], unpSibling: List[Rexp]) extends Ctx
|
|
46 |
case class AltC( mForMyself: Mem, wrapper: Val => Val) extends Ctx
|
|
47 |
case class StarC(mForMyself: Mem, vs: List[Val], inside: Rexp) extends Ctx
|
|
48 |
|
|
49 |
case class Mem(var parents: List[Ctx], var result : MList[(Int, Val)])
|
|
50 |
|
|
51 |
//AltC(Mem(RootC::Nil, Map()))
|
|
52 |
|
|
53 |
|
|
54 |
|
|
55 |
type Zipper = (Val, Mem)
|
|
56 |
|
|
57 |
var mems : MMap[(Int, Rexp), Mem] = MMap()
|
|
58 |
//start pos, original regex --> result entry
|
|
59 |
|
|
60 |
|
|
61 |
var pos : Int = 0
|
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
//input ..................
|
|
66 |
// ^ ^
|
|
67 |
// p q
|
|
68 |
// r
|
|
69 |
|
|
70 |
//parse r[p...q] --> v
|
|
71 |
|
391
|
72 |
//(a+aa)*
|
|
73 |
//aaa
|
|
74 |
//[R(Sequ(a, a)), vs]
|
|
75 |
//[L(a), L(a), vs]
|
390
|
76 |
def check_before_down(c: Ctx, r: Rexp, d: Int = 0) : List[Zipper] = {
|
|
77 |
mems.get((pos, r)) match {
|
|
78 |
case Some(m) =>
|
391
|
79 |
//m.parents = c::m.parents
|
390
|
80 |
m.result.find(tup2 => tup2._1 == pos) match {
|
|
81 |
// case Some((i, v)) =>
|
|
82 |
// original_up(v, c, d)
|
|
83 |
case None =>
|
|
84 |
List()
|
|
85 |
}
|
|
86 |
case None =>
|
|
87 |
val m = Mem(c::Nil, MList.empty[(Int, Val)])
|
|
88 |
mems = mems + ((pos, r) -> m)
|
|
89 |
original_down(r, m, d)
|
|
90 |
}
|
|
91 |
}
|
|
92 |
|
391
|
93 |
//mems pstart r --> m parents [(pend, vres), ...]
|
|
94 |
//aaa
|
|
95 |
//012
|
|
96 |
//seq a a
|
|
97 |
//0 a~a --> m ... [(2, Sequ a a)]
|
390
|
98 |
|
|
99 |
|
|
100 |
def mem_up(vres: Val, m: Mem, rec_depth : Int = 0) : List[Zipper] = {
|
|
101 |
m.result += (pos -> vres)
|
|
102 |
m.parents.flatMap((c: Ctx) =>
|
|
103 |
original_up(vres, c, rec_depth)
|
|
104 |
)
|
|
105 |
}
|
|
106 |
|
|
107 |
def original_down(r: Rexp, m: Mem, d: Int = 0) : List[Zipper] = (r, m) match {
|
|
108 |
case (CHAR(b), m) => {
|
|
109 |
if (input(pos) == b) {
|
|
110 |
List((Chr(b), m))
|
|
111 |
}
|
|
112 |
else
|
|
113 |
Nil
|
|
114 |
}
|
|
115 |
case (ONE, m) => mem_up(Empty, m, d + 1)
|
|
116 |
case (SEQ(r1, r2), m) =>
|
|
117 |
|
|
118 |
val mprime = Mem(AltC(m, x => x )::Nil, MList.empty[(Int, Val)])
|
|
119 |
|
|
120 |
check_before_down(SeqC(mprime, Nil, List(r2)), r1, d)
|
|
121 |
case (ALT(r1, r2), m) =>
|
|
122 |
|
|
123 |
check_before_down(AltC(m, Left(_)), r1, d) :::
|
|
124 |
check_before_down(AltC(m, Right(_)), r2, d)
|
|
125 |
case (STAR(r0), m) =>
|
|
126 |
|
|
127 |
check_before_down(StarC(m, Nil, r0), r0, d)
|
|
128 |
case (_, _) => throw new Exception("original down unexpected r or m")
|
|
129 |
}
|
|
130 |
|
|
131 |
def original_up(v: Val, c: Ctx, d: Int = 0) : List[Zipper] =
|
|
132 |
{
|
|
133 |
|
|
134 |
(v, c) match {
|
|
135 |
case (v, SeqC(m, v1::Nil, Nil)) =>
|
|
136 |
mem_up(Sequ(v1, v), m, d + 1)
|
|
137 |
case (v, SeqC(m, Nil, u1::Nil)) =>
|
|
138 |
check_before_down(SeqC(m, v::Nil, Nil), u1, d)
|
|
139 |
case (v, AltC(m, wrap)) => m.result.find(tup2 => tup2._1 == pos) match {
|
|
140 |
case Some( (i, vPrime) ) =>
|
|
141 |
m.result += (i -> wrap(v))
|
|
142 |
Nil
|
|
143 |
case None =>
|
|
144 |
mem_up(wrap(v), m, d + 1)
|
|
145 |
} //mem_up(AL1(r), par)
|
|
146 |
//case (v, StarC(m, vs, r0)) => throw new Exception("why not hit starC")
|
|
147 |
|
|
148 |
case (v, RootC) =>
|
|
149 |
Nil
|
|
150 |
case (v, StarC(m, vs, r0) ) => mem_up(Stars(v::vs), m, d + 1) :::
|
|
151 |
check_before_down(StarC(m, v::vs, r0), r0, d)
|
|
152 |
case (_, _) => throw new Exception("hit unexpected context")
|
|
153 |
}
|
|
154 |
|
|
155 |
}
|
|
156 |
|
|
157 |
|
|
158 |
def derive(p: Int, z: Zipper) : List[Zipper] = {
|
|
159 |
pos = p
|
|
160 |
z match {
|
|
161 |
case (v, m) => mem_up(v, m)
|
|
162 |
case _ => throw new Exception("error")
|
|
163 |
}
|
|
164 |
}
|
|
165 |
//let e' = Seq([]) in
|
|
166 |
//
|
|
167 |
def init_zipper(r: Rexp) : Zipper = {
|
|
168 |
val m_top = Mem(RootC::Nil, MList.empty[(Int, Val)])
|
|
169 |
val c_top = SeqC(m_top, Nil, r::Nil)
|
|
170 |
val m_r = Mem(c_top::Nil, MList.empty[(Int, Val)])
|
|
171 |
println(s"initial zipper is (ZERO, $m_r)")
|
|
172 |
(Empty, m_r)//TODO: which val should we start with? Maybe Empty, maybe doesn't matter
|
|
173 |
// val dummyRexp = ONE
|
|
174 |
// val dummyMem = Mem()
|
|
175 |
|
|
176 |
}
|
|
177 |
|
|
178 |
|
|
179 |
def plug_convert(v: Val, c: Ctx) : List[Val] =
|
|
180 |
{
|
|
181 |
|
|
182 |
c match {
|
|
183 |
case RootC => List(v)
|
|
184 |
//TODO: non-binary Seq requires ps.rev
|
|
185 |
case SeqC(m, ps::Nil, Nil) =>
|
|
186 |
plug_mem(Sequ(ps, v), m)
|
|
187 |
|
|
188 |
//TODO: un not nullable--partial values?
|
|
189 |
case SeqC(m, Nil, un::Nil) =>
|
|
190 |
if(nullable(un))
|
|
191 |
plug_mem(Sequ(v, mkeps(un)), m)
|
|
192 |
else
|
|
193 |
Nil
|
|
194 |
|
|
195 |
//TODO: when multiple results stored in m, which one to choose?
|
|
196 |
case AltC(m, wrap) =>
|
|
197 |
plug_mem(wrap(v), m)
|
|
198 |
case StarC(m, vs, r0) => plug_mem(Stars(v::vs), m)
|
|
199 |
}
|
|
200 |
|
|
201 |
}
|
|
202 |
|
|
203 |
|
|
204 |
var cnt = 0;
|
|
205 |
def plug_mem(v: Val, m: Mem) : List[Val] = {
|
|
206 |
m.result += (pos -> v)
|
|
207 |
m.parents.flatMap({c =>
|
|
208 |
plug_convert(v, c)
|
|
209 |
}
|
|
210 |
)
|
|
211 |
}
|
|
212 |
|
|
213 |
def plug_all(zs: List[Zipper]) : List[Val] = {
|
|
214 |
zs.flatMap(z => plug_mem(z._1, z._2))
|
|
215 |
}
|
|
216 |
|
|
217 |
|
|
218 |
def mkeps(r: Rexp) : Val = r match {
|
|
219 |
case ONE => Empty
|
|
220 |
case ALT(r1, r2) =>
|
|
221 |
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
|
|
222 |
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
|
|
223 |
case _ => DummyFilling
|
|
224 |
}
|
|
225 |
|
|
226 |
def nullable(r: Rexp) : Boolean = r match {
|
|
227 |
case ZERO => false
|
|
228 |
case ONE => true
|
|
229 |
case CHAR(_) => false
|
|
230 |
case ALT(r1, r2) => nullable(r1) || nullable(r2)
|
|
231 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
|
|
232 |
case _ => false
|
|
233 |
}
|
|
234 |
|
|
235 |
|
|
236 |
val tokList : List[Char] = "aab".toList
|
|
237 |
var input : List[Char] = tokList
|
|
238 |
|
|
239 |
|
|
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
|
244 |
|
|
245 |
def lexRecurse(zs: List[Zipper], index: Int) : List[Zipper] = {
|
|
246 |
if(index < input.length )
|
|
247 |
lexRecurse(zs.flatMap(z => derive(index, z) ), index + 1)
|
|
248 |
else
|
|
249 |
zs
|
|
250 |
}
|
|
251 |
|
|
252 |
def lex(r: Rexp, s: String) : List[Zipper] = {
|
|
253 |
input = s.toList
|
|
254 |
|
|
255 |
lexRecurse(init_zipper(r)::Nil, 0)
|
|
256 |
}
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
implicit def charlist2rexp(s: List[Char]): Rexp = s match {
|
|
261 |
case Nil => ONE
|
|
262 |
case c::Nil => CHAR(c)
|
|
263 |
case c::cs => SEQ(CHAR(c), charlist2rexp(cs))
|
|
264 |
}
|
|
265 |
implicit def string2Rexp(s: String) : Rexp = charlist2rexp(s.toList)
|
|
266 |
|
|
267 |
implicit def RexpOps(r: Rexp) = new {
|
|
268 |
def | (s: Rexp) = ALT(r, s)
|
|
269 |
def ~ (s: Rexp) = SEQ(r, s)
|
|
270 |
def % = STAR(r)
|
|
271 |
}
|
|
272 |
|
|
273 |
implicit def stringOps(s: String) = new {
|
|
274 |
def | (r: Rexp) = ALT(s, r)
|
|
275 |
def | (r: String) = ALT(s, r)
|
|
276 |
def ~ (r: Rexp) = SEQ(s, r)
|
|
277 |
def ~ (r: String) = SEQ(s, r)
|
|
278 |
def % = STAR(s)
|
|
279 |
|
|
280 |
}
|
|
281 |
|
|
282 |
//derive(0, init_zipper(re0))
|
|
283 |
|
|
284 |
// println(re1s.length)
|
|
285 |
// mems.foreach(a => println(a))
|
|
286 |
// val re1sPlugged = plug_all(re1s)
|
|
287 |
// re1sPlugged.foreach(zipper => {
|
|
288 |
// println(zipper);
|
|
289 |
// println("delimit")
|
|
290 |
// })
|
|
291 |
|
|
292 |
// mems.clear()
|
|
293 |
// println(mems)
|
|
294 |
// println(re0)
|
|
295 |
// val re2s = lex(re0, "aab")
|
|
296 |
// val re2sPlugged = plug_all(re2s)
|
|
297 |
// re2sPlugged.foreach(v => {
|
|
298 |
// val Sequ(Empty, vp) = v
|
|
299 |
// println(vp)
|
|
300 |
// }
|
|
301 |
// )
|
|
302 |
// val re0 = SEQ(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a'))),
|
|
303 |
// ALT(SEQ(CHAR('a'), CHAR('b')), SEQ(CHAR('b'), CHAR('c')) )
|
|
304 |
// )
|
|
305 |
|
|
306 |
// val (rgraph, re0root) = makeGraphFromObject(re0)
|
|
307 |
// val asciir = GraphLayout.renderGraph(rgraph)
|
|
308 |
// println("printing out re0")
|
|
309 |
// println(asciir)
|
|
310 |
// val re1s = lex(re0, "aabc")
|
|
311 |
|
|
312 |
def actualZipperSize(zs: List[Zipper]) : Int = zs match {
|
|
313 |
case Nil => 0
|
|
314 |
case z::zs1 => countParents(z._2) + actualZipperSize(zs1)
|
|
315 |
}
|
|
316 |
|
|
317 |
def countParents(m: Mem) : Int = {
|
|
318 |
m.parents.map(c => countGrandParents(c)).sum
|
|
319 |
}
|
|
320 |
|
|
321 |
def countGrandParents(c: Ctx) : Int = {
|
|
322 |
c match {
|
|
323 |
case RootC => 1
|
|
324 |
case SeqC(m, pr, unp) => countParents(m)
|
|
325 |
case AltC(m, w) => countParents(m)
|
|
326 |
case StarC(m, _, _) => countParents(m)
|
|
327 |
}
|
|
328 |
}
|
|
329 |
|
|
330 |
def zipperSimp(zs: List[Zipper]) : List[Zipper] = {
|
|
331 |
zs.distinctBy(z => zipBackMem(z._2))
|
|
332 |
}
|
|
333 |
|
|
334 |
def zipBackToRegex(c: Ctx, r: Rexp = ONE) : List[Rexp] = {
|
|
335 |
c match {
|
|
336 |
case RootC => r::Nil
|
|
337 |
case SeqC(m, pr, Nil) => zipBackMem(m, r)
|
|
338 |
case SeqC(m, pr, unp::Nil) => zipBackMem(m, SEQ(r, unp))
|
|
339 |
case AltC(m, w) => zipBackMem(m, r)
|
|
340 |
case StarC(m, vs, r0) => zipBackMem(m, SEQ(r, STAR(r0)))
|
|
341 |
}
|
|
342 |
}
|
|
343 |
|
|
344 |
def zipBackMem(m: Mem, r: Rexp = ONE) : List[Rexp] = {
|
|
345 |
m.parents.flatMap(c => zipBackToRegex(c, r))
|
|
346 |
}
|
|
347 |
|
|
348 |
//def crystalizeZipper
|
|
349 |
|
|
350 |
mems.clear()
|
|
351 |
val re1 = ("a" | "aa").%
|
391
|
352 |
val re1ss = lex(re1, "aaaaa")
|
390
|
353 |
|
391
|
354 |
//drawZippers(re1ss)
|
|
355 |
println(actualZipperSize(re1ss))
|
|
356 |
//println(re1ss)
|
|
357 |
val re1S = zipperSimp(re1ss)
|
390
|
358 |
//println(actualZipperSize(re1S))
|
|
359 |
|
|
360 |
|
393
|
361 |
mems.clear()
|
|
362 |
val re2 = ALT("a", "bc")
|
|
363 |
val re2res = lex(re2, "a")
|
391
|
364 |
// //lex(1~a, "a") --> lexRecurse((1v, m (SeqC(m (RootC, Nil), Nil, [1~a] ) )))
|
390
|
365 |
|
|
366 |
|
393
|
367 |
println(re2res)
|
390
|
368 |
|
393
|
369 |
val re2resPlugged = plug_all(re2res)
|
|
370 |
re2resPlugged.foreach(v => {
|
|
371 |
val Sequ(Empty, vp) = v
|
|
372 |
println(vp)
|
|
373 |
})
|
390
|
374 |
|
|
375 |
// println("remaining regex")
|
|
376 |
// println(re1ss.flatMap(z => zipBackMem(z._2)))
|
|
377 |
|
|
378 |
|
|
379 |
// val re1ssPlugged = plug_all(re1ss)
|
|
380 |
// println("each of the values")
|
|
381 |
// re1ssPlugged.foreach(v => {
|
|
382 |
// //val Sequ(Empty, vp) = v
|
|
383 |
// //println(vp)
|
|
384 |
// println(v)
|
|
385 |
// }
|
|
386 |
// )
|
|
387 |
// println(mems.size)
|
|
388 |
//println(mems)
|
|
389 |
//mems.map({case (ir, m) => if (ir._1 == 1 && ir._2 == CHAR('b')) println(printMem(m)) })
|
|
390 |
// println("Mkeps + inj:")
|
|
391 |
// println(
|
|
392 |
// mems.get((0, re1)) match {
|
|
393 |
// case Some(m) => printMem(m)
|
|
394 |
// case None => ""
|
|
395 |
// }
|
|
396 |
// )
|
|
397 |
|
|
398 |
// println(re1sPlugged)
|
|
399 |
//drawZippers(re1s, plugOrNot = false)
|
|
400 |
// re1s.foreach{
|
|
401 |
// re1 =>
|
|
402 |
// {
|
|
403 |
|
|
404 |
// drawZippers(derive(1, re1), plugOrNot = true)
|
|
405 |
|
|
406 |
// }
|
|
407 |
// }
|
|
408 |
|
|
409 |
|