diff -r dff4b062a8a9 -r 2d625418c011 parser4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parser4.scala Mon Nov 19 14:18:42 2012 +0000 @@ -0,0 +1,111 @@ + +// parser combinators with input type I and return type T +// and memoisation + +case class SubString(s: String, l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def substring(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubString(s, low, high) + +} + +type Ctxt = List[(String, SubString)] + +abstract class Parser[T] { + + def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] + + def parse_all(s: String) : Set[T] = + for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1) +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case class SubStringParser(s: String) extends Parser[SubString] { + val n = s.length + def parse(sb: SubString, ctxt: Ctxt) = { + if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) + Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) + else Set() + } +} + +implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) + +class IgnLst[T](p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +lazy val E: Parser[Int] = + new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || + ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || + "0" ==> { (s) => 0 } || + "1" ==> { (s) => 1 } || + "2" ==> { (s) => 2 } || + "3" ==> { (s) => 3 }) + +println("foo " + E.parse_all("1+2*3")) + + +// ambiguous grammar + +lazy val S: Parser[String] = + new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "") + +lazy val S2: Parser[String] = + new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "") + +def test2(i: Int) = { + val result = E.parse_all("1" * i) + print(result.size + " " + result + " ") +} + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 10) { + print(i + " ") + print("%.5f".format(time_needed(1, test2(i)))) + print("\n") +} +