progs/parser5.scala
changeset 93 4794759139ea
parent 92 e85600529ca5
equal deleted inserted replaced
92:e85600529ca5 93:4794759139ea
       
     1 val DIGIT = RANGE("0123456789")
       
     2 val NONZERODIGIT = RANGE("123456789")
       
     3 
       
     4 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
       
     5 val LPAREN = CHAR('(')
       
     6 val RPAREN = CHAR(')')
       
     7 val WHITESPACE = PLUS(RANGE(" \n"))
       
     8 val OPS = RANGE("+-*")
       
     9 
       
    10 // for classifying the strings that have been recognised
       
    11 abstract class Token
       
    12 case object T_WHITESPACE extends Token
       
    13 case class T_NUM(s: String) extends Token
       
    14 case class T_OP(s: String) extends Token
       
    15 case object T_LPAREN extends Token
       
    16 case object T_RPAREN extends Token
       
    17 
       
    18 val lexing_rules: List[Rule[Token]]= 
       
    19   List((NUMBER, (s) => T_NUM(s.mkString)),
       
    20        (WHITESPACE, (s) => T_WHITESPACE),
       
    21        (LPAREN, (s) => T_LPAREN),
       
    22        (RPAREN, (s) => T_RPAREN),
       
    23        (OPS, (s) => T_OP(s.mkString)))
       
    24 
       
    25 val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
       
    26 
       
    27 
       
    28 // parser combinators with input type I and return type T
       
    29 // and memoisation
       
    30 
       
    31 case class SubList[T](s: List[T], l: Int, h: Int) {
       
    32   def low = l
       
    33   def high = h
       
    34   def length = h - l
       
    35   def sublist(l: Int = l, h: Int = h) = s.slice(l, h)
       
    36   def set(low: Int = l, high: Int = h) = SubList(s, low, high)
       
    37 }
       
    38 
       
    39 type Ctxt[T] = List[(String, SubList[T])]
       
    40 
       
    41 abstract class Parser[I, T] {
       
    42 
       
    43   def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])]
       
    44 
       
    45   def parse_all(s: List[I]) : Set[T] =
       
    46     for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head
       
    47 
       
    48   def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
       
    49   def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
       
    50   def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
       
    51   def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
       
    52   def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
       
    53 }
       
    54 
       
    55 class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
       
    56   def parse(sb: SubList[I], ctxt: Ctxt[I]) = 
       
    57     for ((head1, tail1) <- p.parse(sb, ctxt); 
       
    58          (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
       
    59 }
       
    60 
       
    61 class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
       
    62   def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)   
       
    63 }
       
    64 
       
    65 class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
       
    66   def parse(sb: SubList[I], ctxt: Ctxt[I]) = 
       
    67     for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
       
    68 }
       
    69 
       
    70 case object NumParser extends Parser[Token, Int] {
       
    71   def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
       
    72     if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { 
       
    73       case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) 
       
    74       case _ => Set()
       
    75     }
       
    76     else Set()
       
    77   }
       
    78 }
       
    79 
       
    80 case class TokParser(t: Token) extends Parser[Token, Token] {
       
    81   def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
       
    82     if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) 
       
    83     else Set()
       
    84   }
       
    85 }
       
    86 
       
    87 implicit def token2tparser(t: Token) = TokParser(t)
       
    88 
       
    89 class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
       
    90   def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
       
    91     if (sb.length == 0) Set()
       
    92     else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) 
       
    93          yield (head, tail.set(high = tail.high + 1))
       
    94   }
       
    95 }
       
    96 
       
    97 class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
       
    98   def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
       
    99     val should_trim = ctxt.contains (nt, sb)
       
   100     if (should_trim && sb.length == 0) Set()
       
   101     else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
       
   102     else p.parse(sb, (nt, sb)::ctxt)
       
   103   }
       
   104 }
       
   105 
       
   106 lazy val E: Parser[Token, Int] = 
       
   107   new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || 
       
   108                  (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
       
   109                  (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
       
   110                  NumParser)
       
   111 
       
   112 println(E.parse_all(Tk.fromString("1 + 2 * 3")))
       
   113 println(E.parse_all(Tk.fromString("(1 + 2) * 3")))