| 629 |      1 | import scala.language.implicitConversions
 | 
|  |      2 | import scala.language.reflectiveCalls
 | 
|  |      3 | 
 | 
|  |      4 | /* Note, in the lectures I did not show the implicit type consraint
 | 
|  |      5 |  * I => Seq[_], which means that the input type 'I' needs to be
 | 
|  |      6 |  * a sequence. */
 | 
|  |      7 | 
 | 
|  |      8 | type IsSeq[A] = A => Seq[_]
 | 
|  |      9 | 
 | 
|  |     10 | abstract class Parser[I : IsSeq, T] {
 | 
|  |     11 |   def parse(ts: I): Set[(T, I)]
 | 
|  |     12 | 
 | 
|  |     13 |   def parse_all(ts: I) : Set[T] =
 | 
|  |     14 |     for ((head, tail) <- parse(ts); 
 | 
|  |     15 |         if (tail.isEmpty)) yield head
 | 
|  |     16 | }
 | 
|  |     17 | 
 | 
|  |     18 | // convenience for matching later on
 | 
|  |     19 | case class ~[+A, +B](_1: A, _2: B)
 | 
|  |     20 | 
 | 
|  |     21 | class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
 | 
|  |     22 |                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | 
|  |     23 |   def parse(sb: I) = 
 | 
|  |     24 |     for ((head1, tail1) <- p.parse(sb); 
 | 
|  |     25 |          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
 | 
|  |     26 | }
 | 
|  |     27 | 
 | 
|  |     28 | class AltParser[I : IsSeq, T](p: => Parser[I, T], 
 | 
|  |     29 |                               q: => Parser[I, T]) extends Parser[I, T] {
 | 
|  |     30 |   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
 | 
|  |     31 | }
 | 
|  |     32 | 
 | 
|  |     33 | class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
 | 
|  |     34 |                                  f: T => S) extends Parser[I, S] {
 | 
|  |     35 |   def parse(sb: I) = 
 | 
|  |     36 |     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
 | 
|  |     37 | }
 | 
|  |     38 | 
 | 
|  |     39 | // atomic parsers for characters, numbers and strings
 | 
|  |     40 | case class CharParser(c: Char) extends Parser[String, Char] {
 | 
|  |     41 |   def parse(sb: String) = 
 | 
|  |     42 |     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
 | 
|  |     43 | }
 | 
|  |     44 | 
 | 
|  |     45 | import scala.util.matching.Regex
 | 
|  |     46 | case class RegexParser(reg: Regex) extends Parser[String, String] {
 | 
|  |     47 |   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | 
|  |     48 |     case None => Set()
 | 
|  |     49 |     case Some(m) => Set((m.matched, m.after.toString))  
 | 
|  |     50 |   }
 | 
|  |     51 | }
 | 
|  |     52 | 
 | 
|  |     53 | val NumParser = RegexParser("[0-9]+".r)
 | 
|  |     54 | def StringParser(s: String) = RegexParser(Regex.quote(s).r)
 | 
|  |     55 | 
 | 
|  |     56 | // NumParserInt2 transforms a "string integer" into an Int;
 | 
|  |     57 | // needs new, because FunParser is not a case class
 | 
|  |     58 | 
 | 
|  |     59 | val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
 | 
|  |     60 | 
 | 
|  |     61 | 
 | 
|  |     62 | // convenience
 | 
|  |     63 | implicit def string2parser(s: String) = StringParser(s)
 | 
|  |     64 | implicit def char2parser(c: Char) = CharParser(c)
 | 
|  |     65 | 
 | 
|  |     66 | implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
 | 
|  |     67 |   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
 | 
|  |     68 |   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
 | 
|  |     69 |   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
 | 
|  |     70 | }
 | 
|  |     71 | 
 | 
|  |     72 | implicit def StringOps(s: String) = new {
 | 
|  |     73 |   def || (q : => Parser[String, String]) = new AltParser[String, String](s, q)
 | 
|  |     74 |   def || (r: String) = new AltParser[String, String](s, r)
 | 
|  |     75 |   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
 | 
|  |     76 |   def ~[S] (q : => Parser[String, S]) = 
 | 
|  |     77 |     new SeqParser[String, String, S](s, q)
 | 
|  |     78 |   def ~ (r: String) = 
 | 
|  |     79 |     new SeqParser[String, String, String](s, r)
 | 
|  |     80 | }
 | 
|  |     81 | 
 | 
|  |     82 | // NumParserInt can now be written as
 | 
|  |     83 | val NumParserInt = NumParser ==> (s => s.toInt)
 | 
|  |     84 | 
 | 
|  |     85 | 
 | 
|  |     86 | lazy val Pal : Parser[String, String] = 
 | 
|  |     87 |   (("a" ~ Pal ~ "a") ==> { case x ~ y ~ z => x + y + z } ||
 | 
|  |     88 |    ("b" ~ Pal ~ "b") ==> { case x ~ y ~ z => x + y + z } || "a" || "b" || "")
 | 
|  |     89 | 
 | 
|  |     90 | Pal.parse_all("abaaaba")
 | 
|  |     91 | Pal.parse("abaaaba")
 | 
|  |     92 | 
 | 
|  |     93 | println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
|  |     94 | 
 | 
|  |     95 | // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
 | 
|  |     96 | lazy val P : Parser[String, String] = 
 | 
|  |     97 |   "(" ~ P ~ ")" ~ P ==> { case _ ~ x ~ _ ~ y => "{" + x + "}" + y } || ""
 | 
|  |     98 | 
 | 
|  |     99 | P.parse_all("(((()()))())")
 | 
|  |    100 | P.parse_all("(((()()))()))")
 | 
|  |    101 | P.parse_all(")(")
 | 
|  |    102 | P.parse_all("()")
 | 
|  |    103 | 
 | 
|  |    104 | // Arithmetic Expressions (Terms and Factors)
 | 
|  |    105 | 
 | 
|  |    106 | lazy val E: Parser[String, Int] = 
 | 
|  |    107 |   (T ~ "+" ~ E) ==> { case x ~ y ~ z => x + z } ||
 | 
|  |    108 |   (T ~ "-" ~ E) ==> { case x ~ y ~ z => x - z } || T 
 | 
|  |    109 | lazy val T: Parser[String, Int] = 
 | 
|  |    110 |   (F ~ "*" ~ T) ==> { case x ~ y ~ z => x * z } || F
 | 
|  |    111 | lazy val F: Parser[String, Int] = 
 | 
|  |    112 |   ("(" ~ E ~ ")") ==> { case x ~ y ~ z => y } || NumParserInt
 | 
|  |    113 | 
 | 
|  |    114 | /* same parser but producing a string
 | 
|  |    115 | lazy val E: Parser[String, String] = 
 | 
|  |    116 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
 | 
|  |    117 | lazy val T: Parser[String, String] = 
 | 
|  |    118 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
 | 
|  |    119 | lazy val F: Parser[String, String] = 
 | 
|  |    120 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
 | 
|  |    121 | */
 | 
|  |    122 | 
 | 
|  |    123 | println(E.parse_all("1+3+4"))
 | 
|  |    124 | println(E.parse("1+3+4"))
 | 
|  |    125 | println(E.parse_all("4*2+3"))
 | 
|  |    126 | println(E.parse_all("4*(2+3)"))
 | 
|  |    127 | println(E.parse_all("(4)*((2+3))"))
 | 
|  |    128 | println(E.parse_all("4/2+3"))
 | 
|  |    129 | println(E.parse("1 + 2 * 3"))
 | 
|  |    130 | println(E.parse_all("(1+2)+3"))
 | 
|  |    131 | println(E.parse_all("1+2+3"))  
 | 
|  |    132 | 
 | 
|  |    133 | 
 | 
|  |    134 | 
 | 
|  |    135 | // no left-recursion allowed, otherwise will loop
 | 
|  |    136 | lazy val EL: Parser[String, Int] = 
 | 
|  |    137 |   (EL ~ "+" ~ EL ==> { case x ~ y ~ z => x + z} || 
 | 
|  |    138 |    EL ~ "*" ~ EL ==> { case x ~ y ~ z => x * z} ||
 | 
|  |    139 |    "(" ~ EL ~ ")" ==> { case x ~ y ~ z => y} ||
 | 
|  |    140 |    NumParserInt)
 | 
|  |    141 | 
 | 
|  |    142 | //println(EL.parse_all("1+2+3"))
 | 
|  |    143 | 
 | 
|  |    144 | 
 | 
|  |    145 | 
 | 
|  |    146 | 
 | 
|  |    147 | // non-ambiguous vs ambiguous grammars
 | 
|  |    148 | 
 | 
|  |    149 | // ambiguous
 | 
|  |    150 | lazy val S : Parser[String, String] =
 | 
|  |    151 |   ("1" ~ S ~ S) ==> { case x ~ y ~ z => x + y + z } || ""
 | 
|  |    152 | 
 | 
|  |    153 | S.parse("1" * 10)
 | 
|  |    154 | 
 | 
|  |    155 | // non-ambiguous
 | 
|  |    156 | lazy val U : Parser[String, String] =
 | 
|  |    157 |   ("1" ~ U) ==> { case x ~ y => x + y  } || ""
 | 
|  |    158 | 
 | 
|  |    159 | U.parse("1" * 25)
 | 
|  |    160 | 
 | 
|  |    161 | U.parse("11")
 | 
|  |    162 | U.parse("11111")
 | 
|  |    163 | U.parse("11011")
 | 
|  |    164 | 
 | 
|  |    165 | U.parse_all("1" * 100)
 | 
|  |    166 | U.parse_all("1" * 100 + "0")
 | 
|  |    167 | 
 | 
|  |    168 | lazy val UCount : Parser[String, Int] =
 | 
|  |    169 |   ("1" ~ UCount) ==> { case x ~ y => y + 1 } || "" ==> { x => 0 }
 | 
|  |    170 | 
 | 
|  |    171 | UCount.parse("11111")
 | 
|  |    172 | UCount.parse_all("11111")
 | 
|  |    173 | 
 | 
|  |    174 | 
 | 
|  |    175 | 
 | 
|  |    176 | // Single Character parser
 | 
|  |    177 | lazy val One : Parser[String, String] = "1"
 | 
|  |    178 | lazy val Two : Parser[String, String] = "2"
 | 
|  |    179 | 
 | 
|  |    180 | One.parse("1")
 | 
|  |    181 | One.parse("111")
 | 
|  |    182 | 
 | 
|  |    183 | (One ~ One).parse("111")
 | 
|  |    184 | (One ~ One ~ One).parse("111")
 | 
|  |    185 | (One ~ One ~ One ~ One).parse("1111")
 | 
|  |    186 | 
 | 
|  |    187 | (One || Two).parse("111")
 | 
|  |    188 | 
 | 
|  |    189 | 
 | 
|  |    190 | 
 | 
|  |    191 | 
 | 
|  |    192 | 
 | 
|  |    193 | // a problem with the parser -> gets slow with nestedness
 | 
|  |    194 | E.parse("1")
 | 
|  |    195 | E.parse("(1)")
 | 
|  |    196 | E.parse("((1))")
 | 
|  |    197 | E.parse("(((1)))")
 | 
|  |    198 | E.parse("((((1))))")
 | 
|  |    199 | E.parse("((((((1))))))")
 | 
|  |    200 | E.parse("(((((((1)))))))") |