parser4.scala
changeset 64 2d625418c011
child 70 e6868bd2942b
equal deleted inserted replaced
63:dff4b062a8a9 64:2d625418c011
       
     1 
       
     2 // parser combinators with input type I and return type T
       
     3 // and memoisation
       
     4 
       
     5 case class SubString(s: String, l: Int, h: Int) {
       
     6   def low = l
       
     7   def high = h
       
     8   def length = h - l
       
     9   def substring(l: Int = l, h: Int = h) = s.slice(l, h)
       
    10   def set(low: Int = l, high: Int = h) = SubString(s, low, high)
       
    11   
       
    12 }
       
    13 
       
    14 type Ctxt = List[(String, SubString)]
       
    15 
       
    16 abstract class Parser[T] {
       
    17 
       
    18   def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)]
       
    19 
       
    20   def parse_all(s: String) : Set[T] =
       
    21     for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head
       
    22 
       
    23   def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
       
    24   def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
       
    25   def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
       
    26   def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (_._2)
       
    27   def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (_._1)
       
    28 }
       
    29 
       
    30 class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
       
    31   def parse(sb: SubString, ctxt: Ctxt) = 
       
    32     for ((head1, tail1) <- p.parse(sb, ctxt); 
       
    33          (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
       
    34 }
       
    35 
       
    36 class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
       
    37   def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)   
       
    38 }
       
    39 
       
    40 class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
       
    41   def parse(sb: SubString, ctxt: Ctxt) = 
       
    42     for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
       
    43 }
       
    44 
       
    45 case class SubStringParser(s: String) extends Parser[SubString] {
       
    46   val n = s.length
       
    47   def parse(sb: SubString, ctxt: Ctxt) = {
       
    48     if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) 
       
    49       Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) 
       
    50     else Set()
       
    51   }
       
    52 }
       
    53 
       
    54 implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring())
       
    55 
       
    56 class IgnLst[T](p: => Parser[T]) extends Parser[T] {
       
    57   def parse(sb: SubString, ctxt: Ctxt) = {
       
    58     if (sb.length == 0) Set()
       
    59     else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) 
       
    60          yield (head, tail.set(high = tail.high + 1))
       
    61   }
       
    62 }
       
    63 
       
    64 class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] {
       
    65   def parse(sb: SubString, ctxt: Ctxt) = {
       
    66     val should_trim = ctxt.contains (nt, sb)
       
    67     if (should_trim && sb.length == 0) Set()
       
    68     else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
       
    69     else p.parse(sb, (nt, sb)::ctxt)
       
    70   }
       
    71 }
       
    72 
       
    73 lazy val E: Parser[Int] = 
       
    74   new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || 
       
    75                  (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
       
    76                  ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} ||
       
    77                  "0" ==> { (s) => 0 } ||
       
    78                  "1" ==> { (s) => 1 } ||
       
    79                  "2" ==> { (s) => 2 } ||
       
    80                  "3" ==> { (s) => 3 })
       
    81 
       
    82 println("foo " + E.parse_all("1+2*3"))
       
    83 
       
    84 
       
    85 // ambiguous grammar
       
    86 
       
    87 lazy val S: Parser[String] = 
       
    88   new CHECK("S", ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "")
       
    89 
       
    90 lazy val S2: Parser[String] = 
       
    91   new CHECK("S2", (S2 ~ S2 ~ S2) ==> { case ((x, y), z) => x + y + z} || "1" || "")
       
    92 
       
    93 def test2(i: Int) = {
       
    94   val result = E.parse_all("1" * i)
       
    95   print(result.size + " " + result + " ")
       
    96 }
       
    97 
       
    98 def time_needed[T](i: Int, code: => T) = {
       
    99   val start = System.nanoTime()
       
   100   for (j <- 1 to i) code
       
   101   val end = System.nanoTime()
       
   102   (end - start)/(i * 1.0e9)
       
   103 }
       
   104 
       
   105 
       
   106 for (i <- 1 to 10) {
       
   107   print(i + " ")
       
   108   print("%.5f".format(time_needed(1, test2(i))))
       
   109   print("\n")
       
   110 }
       
   111