|
1 |
|
2 // parser combinators with input type I and return type T |
|
3 |
|
4 case class SubString(s: String, l: Int, h: Int) { |
|
5 def low = l |
|
6 def high = h |
|
7 def length = h - l |
|
8 def substring(l: Int = l, h: Int = h) = s.slice(l, h) |
|
9 def set(low: Int = l, high: Int = h) = SubString(s, low, high) |
|
10 |
|
11 } |
|
12 |
|
13 type Ctxt = List[(String, SubString)] |
|
14 |
|
15 abstract class Parser[T] { |
|
16 |
|
17 def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] |
|
18 |
|
19 def parse_all(s: String) : Set[T] = |
|
20 for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head |
|
21 |
|
22 def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) |
|
23 def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) |
|
24 def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) |
|
25 } |
|
26 |
|
27 class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { |
|
28 def parse(sb: SubString, ctxt: Ctxt) = |
|
29 for ((head1, tail1) <- p.parse(sb, ctxt); |
|
30 (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) |
|
31 } |
|
32 |
|
33 class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { |
|
34 def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) |
|
35 } |
|
36 |
|
37 class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { |
|
38 def parse(sb: SubString, ctxt: Ctxt) = |
|
39 for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) |
|
40 } |
|
41 |
|
42 case class SubStringParser(s: String) extends Parser[SubString] { |
|
43 val n = s.length |
|
44 def parse(sb: SubString, ctxt: Ctxt) = { |
|
45 if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) |
|
46 Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) |
|
47 else Set() |
|
48 } |
|
49 } |
|
50 |
|
51 implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) |
|
52 |
|
53 class IgnLst[T](p: => Parser[T]) extends Parser[T] { |
|
54 def parse(sb: SubString, ctxt: Ctxt) = { |
|
55 if (sb.length == 0) Set() |
|
56 else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) |
|
57 yield (head, tail.set(high = tail.high + 1)) |
|
58 } |
|
59 } |
|
60 |
|
61 class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { |
|
62 def parse(sb: SubString, ctxt: Ctxt) = { |
|
63 val should_trim = ctxt.contains (nt, sb) |
|
64 if (should_trim && sb.length == 0) Set() |
|
65 else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) |
|
66 else p.parse(sb, (nt, sb)::ctxt) |
|
67 } |
|
68 } |
|
69 |
|
70 // ambigous grammar |
|
71 lazy val E: Parser[Int] = |
|
72 new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || |
|
73 (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || |
|
74 ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || |
|
75 "0" ==> { (s) => 0 } || |
|
76 "1" ==> { (s) => 1 } || |
|
77 "2" ==> { (s) => 2 } || |
|
78 "3" ==> { (s) => 3 }) |
|
79 |
|
80 println(E.parse_all("1+2*3+3")) |
|
81 |
|
82 |