| 
629
 | 
     1  | 
import scala.language.implicitConversions
  | 
| 
 | 
     2  | 
import scala.language.reflectiveCalls
  | 
| 
 | 
     3  | 
  | 
| 
672
 | 
     4  | 
// more convenience for the semantic actions later on
  | 
| 
 | 
     5  | 
case class ~[+A, +B](_1: A, _2: B)
  | 
| 
 | 
     6  | 
  | 
| 
 | 
     7  | 
  | 
| 
629
 | 
     8  | 
/* Note, in the lectures I did not show the implicit type consraint
  | 
| 
 | 
     9  | 
 * I => Seq[_], which means that the input type 'I' needs to be
  | 
| 
 | 
    10  | 
 * a sequence. */
  | 
| 
 | 
    11  | 
  | 
| 
 | 
    12  | 
type IsSeq[A] = A => Seq[_]
  | 
| 
 | 
    13  | 
  | 
| 
 | 
    14  | 
abstract class Parser[I : IsSeq, T] {
 | 
| 
 | 
    15  | 
  def parse(ts: I): Set[(T, I)]
  | 
| 
 | 
    16  | 
  | 
| 
 | 
    17  | 
  def parse_all(ts: I) : Set[T] =
  | 
| 
 | 
    18  | 
    for ((head, tail) <- parse(ts); 
  | 
| 
683
 | 
    19  | 
        if tail.isEmpty) yield head
  | 
| 
629
 | 
    20  | 
}
  | 
| 
 | 
    21  | 
  | 
| 
 | 
    22  | 
  | 
| 
 | 
    23  | 
class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
  | 
| 
 | 
    24  | 
                                 q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | 
| 
 | 
    25  | 
  def parse(sb: I) = 
  | 
| 
 | 
    26  | 
    for ((head1, tail1) <- p.parse(sb); 
  | 
| 
 | 
    27  | 
         (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
  | 
| 
 | 
    28  | 
}
  | 
| 
 | 
    29  | 
  | 
| 
 | 
    30  | 
class AltParser[I : IsSeq, T](p: => Parser[I, T], 
  | 
| 
 | 
    31  | 
                              q: => Parser[I, T]) extends Parser[I, T] {
 | 
| 
 | 
    32  | 
  def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
  | 
| 
 | 
    33  | 
}
  | 
| 
 | 
    34  | 
  | 
| 
 | 
    35  | 
class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
  | 
| 
 | 
    36  | 
                                 f: T => S) extends Parser[I, S] {
 | 
| 
 | 
    37  | 
  def parse(sb: I) = 
  | 
| 
 | 
    38  | 
    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
  | 
| 
 | 
    39  | 
}
  | 
| 
 | 
    40  | 
  | 
| 
 | 
    41  | 
// atomic parsers for characters, numbers and strings
  | 
| 
 | 
    42  | 
case class CharParser(c: Char) extends Parser[String, Char] {
 | 
| 
 | 
    43  | 
  def parse(sb: String) = 
  | 
| 
 | 
    44  | 
    if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
  | 
| 
 | 
    45  | 
}
  | 
| 
 | 
    46  | 
  | 
| 
 | 
    47  | 
import scala.util.matching.Regex
  | 
| 
 | 
    48  | 
case class RegexParser(reg: Regex) extends Parser[String, String] {
 | 
| 
 | 
    49  | 
  def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | 
| 
 | 
    50  | 
    case None => Set()
  | 
| 
 | 
    51  | 
    case Some(m) => Set((m.matched, m.after.toString))  
  | 
| 
 | 
    52  | 
  }
  | 
| 
 | 
    53  | 
}
  | 
| 
 | 
    54  | 
  | 
| 
 | 
    55  | 
val NumParser = RegexParser("[0-9]+".r)
 | 
| 
 | 
    56  | 
def StringParser(s: String) = RegexParser(Regex.quote(s).r)
  | 
| 
 | 
    57  | 
  | 
| 
 | 
    58  | 
// NumParserInt2 transforms a "string integer" into an Int;
  | 
| 
 | 
    59  | 
// needs new, because FunParser is not a case class
  | 
| 
 | 
    60  | 
  | 
| 
 | 
    61  | 
val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
  | 
| 
 | 
    62  | 
  | 
| 
 | 
    63  | 
  | 
| 
 | 
    64  | 
// convenience
  | 
| 
 | 
    65  | 
implicit def string2parser(s: String) = StringParser(s)
  | 
| 
 | 
    66  | 
implicit def char2parser(c: Char) = CharParser(c)
  | 
| 
 | 
    67  | 
  | 
| 
673
 | 
    68  | 
implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
 | 
| 
629
 | 
    69  | 
  def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
  | 
| 
 | 
    70  | 
  def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
  | 
| 
 | 
    71  | 
  def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
  | 
| 
 | 
    72  | 
}
  | 
| 
 | 
    73  | 
  | 
| 
 | 
    74  | 
implicit def StringOps(s: String) = new {
 | 
| 
 | 
    75  | 
  def || (q : => Parser[String, String]) = new AltParser[String, String](s, q)
  | 
| 
 | 
    76  | 
  def || (r: String) = new AltParser[String, String](s, r)
  | 
| 
 | 
    77  | 
  def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
  | 
| 
 | 
    78  | 
  def ~[S] (q : => Parser[String, S]) = 
  | 
| 
 | 
    79  | 
    new SeqParser[String, String, S](s, q)
  | 
| 
 | 
    80  | 
  def ~ (r: String) = 
  | 
| 
 | 
    81  | 
    new SeqParser[String, String, String](s, r)
  | 
| 
 | 
    82  | 
}
  | 
| 
 | 
    83  | 
  | 
| 
 | 
    84  | 
// NumParserInt can now be written as
  | 
| 
 | 
    85  | 
val NumParserInt = NumParser ==> (s => s.toInt)
  | 
| 
 | 
    86  | 
  | 
| 
 | 
    87  | 
  | 
| 
 | 
    88  | 
lazy val Pal : Parser[String, String] = 
  | 
| 
 | 
    89  | 
  (("a" ~ Pal ~ "a") ==> { case x ~ y ~ z => x + y + z } ||
 | 
| 
 | 
    90  | 
   ("b" ~ Pal ~ "b") ==> { case x ~ y ~ z => x + y + z } || "a" || "b" || "")
 | 
| 
 | 
    91  | 
  | 
| 
 | 
    92  | 
Pal.parse_all("abaaaba")
 | 
| 
 | 
    93  | 
Pal.parse("abaaaba")
 | 
| 
 | 
    94  | 
  | 
| 
 | 
    95  | 
println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
| 
 | 
    96  | 
  | 
| 
 | 
    97  | 
// well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
 | 
| 
 | 
    98  | 
lazy val P : Parser[String, String] = 
  | 
| 
 | 
    99  | 
  "(" ~ P ~ ")" ~ P ==> { case _ ~ x ~ _ ~ y => "{" + x + "}" + y } || ""
 | 
| 
 | 
   100  | 
  | 
| 
 | 
   101  | 
P.parse_all("(((()()))())")
 | 
| 
 | 
   102  | 
P.parse_all("(((()()))()))")
 | 
| 
 | 
   103  | 
P.parse_all(")(")
 | 
| 
 | 
   104  | 
P.parse_all("()")
 | 
| 
 | 
   105  | 
  | 
| 
 | 
   106  | 
// Arithmetic Expressions (Terms and Factors)
  | 
| 
 | 
   107  | 
  | 
| 
 | 
   108  | 
lazy val E: Parser[String, Int] = 
  | 
| 
 | 
   109  | 
  (T ~ "+" ~ E) ==> { case x ~ y ~ z => x + z } ||
 | 
| 
 | 
   110  | 
  (T ~ "-" ~ E) ==> { case x ~ y ~ z => x - z } || T 
 | 
| 
 | 
   111  | 
lazy val T: Parser[String, Int] = 
  | 
| 
 | 
   112  | 
  (F ~ "*" ~ T) ==> { case x ~ y ~ z => x * z } || F
 | 
| 
 | 
   113  | 
lazy val F: Parser[String, Int] = 
  | 
| 
 | 
   114  | 
  ("(" ~ E ~ ")") ==> { case x ~ y ~ z => y } || NumParserInt
 | 
| 
 | 
   115  | 
  | 
| 
 | 
   116  | 
/* same parser but producing a string
  | 
| 
 | 
   117  | 
lazy val E: Parser[String, String] = 
  | 
| 
 | 
   118  | 
  (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
 | 
| 
 | 
   119  | 
lazy val T: Parser[String, String] = 
  | 
| 
 | 
   120  | 
  (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
 | 
| 
 | 
   121  | 
lazy val F: Parser[String, String] = 
  | 
| 
 | 
   122  | 
  ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
 | 
| 
 | 
   123  | 
*/
  | 
| 
 | 
   124  | 
  | 
| 
 | 
   125  | 
println(E.parse_all("1+3+4"))
 | 
| 
 | 
   126  | 
println(E.parse("1+3+4"))
 | 
| 
 | 
   127  | 
println(E.parse_all("4*2+3"))
 | 
| 
 | 
   128  | 
println(E.parse_all("4*(2+3)"))
 | 
| 
 | 
   129  | 
println(E.parse_all("(4)*((2+3))"))
 | 
| 
 | 
   130  | 
println(E.parse_all("4/2+3"))
 | 
| 
 | 
   131  | 
println(E.parse("1 + 2 * 3"))
 | 
| 
 | 
   132  | 
println(E.parse_all("(1+2)+3"))
 | 
| 
 | 
   133  | 
println(E.parse_all("1+2+3"))  
 | 
| 
 | 
   134  | 
  | 
| 
 | 
   135  | 
  | 
| 
 | 
   136  | 
  | 
| 
 | 
   137  | 
// no left-recursion allowed, otherwise will loop
  | 
| 
 | 
   138  | 
lazy val EL: Parser[String, Int] = 
  | 
| 
 | 
   139  | 
  (EL ~ "+" ~ EL ==> { case x ~ y ~ z => x + z} || 
 | 
| 
 | 
   140  | 
   EL ~ "*" ~ EL ==> { case x ~ y ~ z => x * z} ||
 | 
| 
 | 
   141  | 
   "(" ~ EL ~ ")" ==> { case x ~ y ~ z => y} ||
 | 
| 
 | 
   142  | 
   NumParserInt)
  | 
| 
 | 
   143  | 
  | 
| 
 | 
   144  | 
//println(EL.parse_all("1+2+3"))
 | 
| 
 | 
   145  | 
  | 
| 
 | 
   146  | 
  | 
| 
 | 
   147  | 
  | 
| 
 | 
   148  | 
  | 
| 
 | 
   149  | 
// non-ambiguous vs ambiguous grammars
  | 
| 
 | 
   150  | 
  | 
| 
 | 
   151  | 
// ambiguous
  | 
| 
 | 
   152  | 
lazy val S : Parser[String, String] =
  | 
| 
 | 
   153  | 
  ("1" ~ S ~ S) ==> { case x ~ y ~ z => x + y + z } || ""
 | 
| 
 | 
   154  | 
  | 
| 
 | 
   155  | 
S.parse("1" * 10)
 | 
| 
 | 
   156  | 
  | 
| 
 | 
   157  | 
// non-ambiguous
  | 
| 
 | 
   158  | 
lazy val U : Parser[String, String] =
  | 
| 
 | 
   159  | 
  ("1" ~ U) ==> { case x ~ y => x + y  } || ""
 | 
| 
 | 
   160  | 
  | 
| 
 | 
   161  | 
U.parse("1" * 25)
 | 
| 
 | 
   162  | 
  | 
| 
 | 
   163  | 
U.parse("11")
 | 
| 
 | 
   164  | 
U.parse("11111")
 | 
| 
 | 
   165  | 
U.parse("11011")
 | 
| 
 | 
   166  | 
  | 
| 
 | 
   167  | 
U.parse_all("1" * 100)
 | 
| 
 | 
   168  | 
U.parse_all("1" * 100 + "0")
 | 
| 
 | 
   169  | 
  | 
| 
 | 
   170  | 
lazy val UCount : Parser[String, Int] =
  | 
| 
 | 
   171  | 
  ("1" ~ UCount) ==> { case x ~ y => y + 1 } || "" ==> { x => 0 }
 | 
| 
 | 
   172  | 
  | 
| 
 | 
   173  | 
UCount.parse("11111")
 | 
| 
 | 
   174  | 
UCount.parse_all("11111")
 | 
| 
 | 
   175  | 
  | 
| 
 | 
   176  | 
  | 
| 
 | 
   177  | 
  | 
| 
 | 
   178  | 
// Single Character parser
  | 
| 
 | 
   179  | 
lazy val One : Parser[String, String] = "1"
  | 
| 
 | 
   180  | 
lazy val Two : Parser[String, String] = "2"
  | 
| 
 | 
   181  | 
  | 
| 
 | 
   182  | 
One.parse("1")
 | 
| 
 | 
   183  | 
One.parse("111")
 | 
| 
 | 
   184  | 
  | 
| 
 | 
   185  | 
(One ~ One).parse("111")
 | 
| 
 | 
   186  | 
(One ~ One ~ One).parse("111")
 | 
| 
 | 
   187  | 
(One ~ One ~ One ~ One).parse("1111")
 | 
| 
 | 
   188  | 
  | 
| 
 | 
   189  | 
(One || Two).parse("111")
 | 
| 
 | 
   190  | 
  | 
| 
 | 
   191  | 
  | 
| 
 | 
   192  | 
  | 
| 
672
 | 
   193  | 
// a problem with the arithmetic expression parser -> gets 
  | 
| 
 | 
   194  | 
// slow with deep nestedness
  | 
| 
 | 
   195  | 
println("Runtime problem")
 | 
| 
629
 | 
   196  | 
E.parse("1")
 | 
| 
 | 
   197  | 
E.parse("(1)")
 | 
| 
 | 
   198  | 
E.parse("((1))")
 | 
| 
 | 
   199  | 
E.parse("(((1)))")
 | 
| 
 | 
   200  | 
E.parse("((((1))))")
 | 
| 
 | 
   201  | 
E.parse("((((((1))))))")
 | 
| 
672
 | 
   202  | 
E.parse("(((((((1)))))))")
 | 
| 
 | 
   203  | 
E.parse("((((((((1)))))))")
 |