| 629 |      1 | import scala.language.implicitConversions
 | 
|  |      2 | import scala.language.reflectiveCalls
 | 
|  |      3 | 
 | 
| 672 |      4 | // more convenience for the semantic actions later on
 | 
|  |      5 | case class ~[+A, +B](_1: A, _2: B)
 | 
|  |      6 | 
 | 
|  |      7 | 
 | 
| 629 |      8 | /* Note, in the lectures I did not show the implicit type consraint
 | 
|  |      9 |  * I => Seq[_], which means that the input type 'I' needs to be
 | 
|  |     10 |  * a sequence. */
 | 
|  |     11 | 
 | 
|  |     12 | type IsSeq[A] = A => Seq[_]
 | 
|  |     13 | 
 | 
|  |     14 | abstract class Parser[I : IsSeq, T] {
 | 
|  |     15 |   def parse(ts: I): Set[(T, I)]
 | 
|  |     16 | 
 | 
|  |     17 |   def parse_all(ts: I) : Set[T] =
 | 
|  |     18 |     for ((head, tail) <- parse(ts); 
 | 
|  |     19 |         if (tail.isEmpty)) yield head
 | 
|  |     20 | }
 | 
|  |     21 | 
 | 
|  |     22 | 
 | 
|  |     23 | class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
 | 
|  |     24 |                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | 
|  |     25 |   def parse(sb: I) = 
 | 
|  |     26 |     for ((head1, tail1) <- p.parse(sb); 
 | 
|  |     27 |          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
 | 
|  |     28 | }
 | 
|  |     29 | 
 | 
|  |     30 | class AltParser[I : IsSeq, T](p: => Parser[I, T], 
 | 
|  |     31 |                               q: => Parser[I, T]) extends Parser[I, T] {
 | 
|  |     32 |   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
 | 
|  |     33 | }
 | 
|  |     34 | 
 | 
|  |     35 | class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
 | 
|  |     36 |                                  f: T => S) extends Parser[I, S] {
 | 
|  |     37 |   def parse(sb: I) = 
 | 
|  |     38 |     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
 | 
|  |     39 | }
 | 
|  |     40 | 
 | 
|  |     41 | // atomic parsers for characters, numbers and strings
 | 
|  |     42 | case class CharParser(c: Char) extends Parser[String, Char] {
 | 
|  |     43 |   def parse(sb: String) = 
 | 
|  |     44 |     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
 | 
|  |     45 | }
 | 
|  |     46 | 
 | 
|  |     47 | import scala.util.matching.Regex
 | 
|  |     48 | case class RegexParser(reg: Regex) extends Parser[String, String] {
 | 
|  |     49 |   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | 
|  |     50 |     case None => Set()
 | 
|  |     51 |     case Some(m) => Set((m.matched, m.after.toString))  
 | 
|  |     52 |   }
 | 
|  |     53 | }
 | 
|  |     54 | 
 | 
|  |     55 | val NumParser = RegexParser("[0-9]+".r)
 | 
|  |     56 | def StringParser(s: String) = RegexParser(Regex.quote(s).r)
 | 
|  |     57 | 
 | 
|  |     58 | // NumParserInt2 transforms a "string integer" into an Int;
 | 
|  |     59 | // needs new, because FunParser is not a case class
 | 
|  |     60 | 
 | 
|  |     61 | val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
 | 
|  |     62 | 
 | 
|  |     63 | 
 | 
|  |     64 | // convenience
 | 
|  |     65 | implicit def string2parser(s: String) = StringParser(s)
 | 
|  |     66 | implicit def char2parser(c: Char) = CharParser(c)
 | 
|  |     67 | 
 | 
| 673 |     68 | implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
 | 
| 629 |     69 |   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
 | 
|  |     70 |   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
 | 
|  |     71 |   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
 | 
|  |     72 | }
 | 
|  |     73 | 
 | 
|  |     74 | implicit def StringOps(s: String) = new {
 | 
|  |     75 |   def || (q : => Parser[String, String]) = new AltParser[String, String](s, q)
 | 
|  |     76 |   def || (r: String) = new AltParser[String, String](s, r)
 | 
|  |     77 |   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
 | 
|  |     78 |   def ~[S] (q : => Parser[String, S]) = 
 | 
|  |     79 |     new SeqParser[String, String, S](s, q)
 | 
|  |     80 |   def ~ (r: String) = 
 | 
|  |     81 |     new SeqParser[String, String, String](s, r)
 | 
|  |     82 | }
 | 
|  |     83 | 
 | 
|  |     84 | // NumParserInt can now be written as
 | 
|  |     85 | val NumParserInt = NumParser ==> (s => s.toInt)
 | 
|  |     86 | 
 | 
|  |     87 | 
 | 
|  |     88 | lazy val Pal : Parser[String, String] = 
 | 
|  |     89 |   (("a" ~ Pal ~ "a") ==> { case x ~ y ~ z => x + y + z } ||
 | 
|  |     90 |    ("b" ~ Pal ~ "b") ==> { case x ~ y ~ z => x + y + z } || "a" || "b" || "")
 | 
|  |     91 | 
 | 
|  |     92 | Pal.parse_all("abaaaba")
 | 
|  |     93 | Pal.parse("abaaaba")
 | 
|  |     94 | 
 | 
|  |     95 | println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
|  |     96 | 
 | 
|  |     97 | // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
 | 
|  |     98 | lazy val P : Parser[String, String] = 
 | 
|  |     99 |   "(" ~ P ~ ")" ~ P ==> { case _ ~ x ~ _ ~ y => "{" + x + "}" + y } || ""
 | 
|  |    100 | 
 | 
|  |    101 | P.parse_all("(((()()))())")
 | 
|  |    102 | P.parse_all("(((()()))()))")
 | 
|  |    103 | P.parse_all(")(")
 | 
|  |    104 | P.parse_all("()")
 | 
|  |    105 | 
 | 
|  |    106 | // Arithmetic Expressions (Terms and Factors)
 | 
|  |    107 | 
 | 
|  |    108 | lazy val E: Parser[String, Int] = 
 | 
|  |    109 |   (T ~ "+" ~ E) ==> { case x ~ y ~ z => x + z } ||
 | 
|  |    110 |   (T ~ "-" ~ E) ==> { case x ~ y ~ z => x - z } || T 
 | 
|  |    111 | lazy val T: Parser[String, Int] = 
 | 
|  |    112 |   (F ~ "*" ~ T) ==> { case x ~ y ~ z => x * z } || F
 | 
|  |    113 | lazy val F: Parser[String, Int] = 
 | 
|  |    114 |   ("(" ~ E ~ ")") ==> { case x ~ y ~ z => y } || NumParserInt
 | 
|  |    115 | 
 | 
|  |    116 | /* same parser but producing a string
 | 
|  |    117 | lazy val E: Parser[String, String] = 
 | 
|  |    118 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
 | 
|  |    119 | lazy val T: Parser[String, String] = 
 | 
|  |    120 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
 | 
|  |    121 | lazy val F: Parser[String, String] = 
 | 
|  |    122 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
 | 
|  |    123 | */
 | 
|  |    124 | 
 | 
|  |    125 | println(E.parse_all("1+3+4"))
 | 
|  |    126 | println(E.parse("1+3+4"))
 | 
|  |    127 | println(E.parse_all("4*2+3"))
 | 
|  |    128 | println(E.parse_all("4*(2+3)"))
 | 
|  |    129 | println(E.parse_all("(4)*((2+3))"))
 | 
|  |    130 | println(E.parse_all("4/2+3"))
 | 
|  |    131 | println(E.parse("1 + 2 * 3"))
 | 
|  |    132 | println(E.parse_all("(1+2)+3"))
 | 
|  |    133 | println(E.parse_all("1+2+3"))  
 | 
|  |    134 | 
 | 
|  |    135 | 
 | 
|  |    136 | 
 | 
|  |    137 | // no left-recursion allowed, otherwise will loop
 | 
|  |    138 | lazy val EL: Parser[String, Int] = 
 | 
|  |    139 |   (EL ~ "+" ~ EL ==> { case x ~ y ~ z => x + z} || 
 | 
|  |    140 |    EL ~ "*" ~ EL ==> { case x ~ y ~ z => x * z} ||
 | 
|  |    141 |    "(" ~ EL ~ ")" ==> { case x ~ y ~ z => y} ||
 | 
|  |    142 |    NumParserInt)
 | 
|  |    143 | 
 | 
|  |    144 | //println(EL.parse_all("1+2+3"))
 | 
|  |    145 | 
 | 
|  |    146 | 
 | 
|  |    147 | 
 | 
|  |    148 | 
 | 
|  |    149 | // non-ambiguous vs ambiguous grammars
 | 
|  |    150 | 
 | 
|  |    151 | // ambiguous
 | 
|  |    152 | lazy val S : Parser[String, String] =
 | 
|  |    153 |   ("1" ~ S ~ S) ==> { case x ~ y ~ z => x + y + z } || ""
 | 
|  |    154 | 
 | 
|  |    155 | S.parse("1" * 10)
 | 
|  |    156 | 
 | 
|  |    157 | // non-ambiguous
 | 
|  |    158 | lazy val U : Parser[String, String] =
 | 
|  |    159 |   ("1" ~ U) ==> { case x ~ y => x + y  } || ""
 | 
|  |    160 | 
 | 
|  |    161 | U.parse("1" * 25)
 | 
|  |    162 | 
 | 
|  |    163 | U.parse("11")
 | 
|  |    164 | U.parse("11111")
 | 
|  |    165 | U.parse("11011")
 | 
|  |    166 | 
 | 
|  |    167 | U.parse_all("1" * 100)
 | 
|  |    168 | U.parse_all("1" * 100 + "0")
 | 
|  |    169 | 
 | 
|  |    170 | lazy val UCount : Parser[String, Int] =
 | 
|  |    171 |   ("1" ~ UCount) ==> { case x ~ y => y + 1 } || "" ==> { x => 0 }
 | 
|  |    172 | 
 | 
|  |    173 | UCount.parse("11111")
 | 
|  |    174 | UCount.parse_all("11111")
 | 
|  |    175 | 
 | 
|  |    176 | 
 | 
|  |    177 | 
 | 
|  |    178 | // Single Character parser
 | 
|  |    179 | lazy val One : Parser[String, String] = "1"
 | 
|  |    180 | lazy val Two : Parser[String, String] = "2"
 | 
|  |    181 | 
 | 
|  |    182 | One.parse("1")
 | 
|  |    183 | One.parse("111")
 | 
|  |    184 | 
 | 
|  |    185 | (One ~ One).parse("111")
 | 
|  |    186 | (One ~ One ~ One).parse("111")
 | 
|  |    187 | (One ~ One ~ One ~ One).parse("1111")
 | 
|  |    188 | 
 | 
|  |    189 | (One || Two).parse("111")
 | 
|  |    190 | 
 | 
|  |    191 | 
 | 
|  |    192 | 
 | 
| 672 |    193 | // a problem with the arithmetic expression parser -> gets 
 | 
|  |    194 | // slow with deep nestedness
 | 
|  |    195 | println("Runtime problem")
 | 
| 629 |    196 | E.parse("1")
 | 
|  |    197 | E.parse("(1)")
 | 
|  |    198 | E.parse("((1))")
 | 
|  |    199 | E.parse("(((1)))")
 | 
|  |    200 | E.parse("((((1))))")
 | 
|  |    201 | E.parse("((((((1))))))")
 | 
| 672 |    202 | E.parse("(((((((1)))))))")
 | 
|  |    203 | E.parse("((((((((1)))))))")
 |