|
1 |
|
2 // parser combinators with input type I and return type T |
|
3 // and memoisation |
|
4 |
|
5 case class SubString(s: String, l: Int, h: Int) { |
|
6 def low = l |
|
7 def high = h |
|
8 def length = h - l |
|
9 def substring(l: Int = l, h: Int = h) = s.slice(l, h) |
|
10 def set(low: Int = l, high: Int = h) = SubString(s, low, high) |
|
11 |
|
12 } |
|
13 |
|
14 type Ctxt = List[(String, SubString)] |
|
15 |
|
16 abstract class Parser[T] { |
|
17 |
|
18 def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] |
|
19 |
|
20 def parse_all(s: String) : Set[T] = |
|
21 for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head |
|
22 |
|
23 def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) |
|
24 def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) |
|
25 def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) |
|
26 def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2) |
|
27 def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1) |
|
28 } |
|
29 |
|
30 class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { |
|
31 def parse(sb: SubString, ctxt: Ctxt) = |
|
32 for ((head1, tail1) <- p.parse(sb, ctxt); |
|
33 (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) |
|
34 } |
|
35 |
|
36 class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { |
|
37 def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) |
|
38 } |
|
39 |
|
40 class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { |
|
41 def parse(sb: SubString, ctxt: Ctxt) = |
|
42 for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) |
|
43 } |
|
44 |
|
45 case class SubStringParser(s: String) extends Parser[SubString] { |
|
46 val n = s.length |
|
47 def parse(sb: SubString, ctxt: Ctxt) = { |
|
48 if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) |
|
49 Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) |
|
50 else Set() |
|
51 } |
|
52 } |
|
53 |
|
54 implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) |
|
55 |
|
56 class IgnLst[T](p: => Parser[T]) extends Parser[T] { |
|
57 def parse(sb: SubString, ctxt: Ctxt) = { |
|
58 if (sb.length == 0) Set() |
|
59 else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) |
|
60 yield (head, tail.set(high = tail.high + 1)) |
|
61 } |
|
62 } |
|
63 |
|
64 class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { |
|
65 def parse(sb: SubString, ctxt: Ctxt) = { |
|
66 val should_trim = ctxt.contains (nt, sb) |
|
67 if (should_trim && sb.length == 0) Set() |
|
68 else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) |
|
69 else p.parse(sb, (nt, sb)::ctxt) |
|
70 } |
|
71 } |
|
72 |
|
73 lazy val E: Parser[Int] = |
|
74 new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || |
|
75 (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || |
|
76 ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || |
|
77 "0" ==> { (s) => 0 } || |
|
78 "1" ==> { (s) => 1 } || |
|
79 "2" ==> { (s) => 2 } || |
|
80 "3" ==> { (s) => 3 }) |
|
81 |
|
82 println("foo " + E.parse_all("1+2*3")) |
|
83 |
|
84 |
|
85 // ambiguous grammar |
|
86 |
|
87 lazy val S: Parser[String] = |
|
88 new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "") |
|
89 |
|
90 lazy val S2: Parser[String] = |
|
91 new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "") |
|
92 |
|
93 def test2(i: Int) = { |
|
94 val result = E.parse_all("1" * i) |
|
95 print(result.size + " " + result + " ") |
|
96 } |
|
97 |
|
98 def time_needed[T](i: Int, code: => T) = { |
|
99 val start = System.nanoTime() |
|
100 for (j <- 1 to i) code |
|
101 val end = System.nanoTime() |
|
102 (end - start)/(i * 1.0e9) |
|
103 } |
|
104 |
|
105 |
|
106 for (i <- 1 to 10) { |
|
107 print(i + " ") |
|
108 print("%.5f".format(time_needed(1, test2(i)))) |
|
109 print("\n") |
|
110 } |
|
111 |