| author | Christian Urban <urbanc@in.tum.de> | 
| Mon, 19 Nov 2018 22:44:56 +0000 | |
| changeset 603 | 754f7ced2cf1 | 
| parent 599 | 0b512541f7ce | 
| child 624 | e50096adda15 | 
| permissions | -rw-r--r-- | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | import scala.language.implicitConversions | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 4 | /* Note, in the lectures I did not show the type consraint | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 5 | * I <% Seq[_] , which means that the input type I can be | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 6 | * treated, or seen, as a sequence. */ | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 7 | |
| 461 | 8 | abstract class Parser[I <% Seq[_], T] {
 | 
| 9 | def parse(ts: I): Set[(T, I)] | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | |
| 461 | 11 | def parse_all(ts: I) : Set[T] = | 
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 12 | for ((head, tail) <- parse(ts); | 
| 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 13 | if (tail.isEmpty)) yield head | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | |
| 461 | 16 | class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], | 
| 17 |                                    q: => Parser[I, S]) extends Parser[I, (T, S)] {
 | |
| 18 | def parse(sb: I) = | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | for ((head1, tail1) <- p.parse(sb); | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | |
| 461 | 23 | class AltParser[I <% Seq[_], T](p: => Parser[I, T], | 
| 24 |                                 q: => Parser[I, T]) extends Parser[I, T] {
 | |
| 25 | def parse(sb: I) = p.parse(sb) ++ q.parse(sb) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | |
| 461 | 28 | class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], | 
| 29 |                                    f: T => S) extends Parser[I, S] {
 | |
| 30 | def parse(sb: I) = | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | |
| 183 
b17eff695c7f
added new stuff
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
178diff
changeset | 34 | // atomic parsers | 
| 461 | 35 | case class CharParser(c: Char) extends Parser[String, Char] {
 | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 36 | def parse(sb: String) = | 
| 462 | 37 | if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set() | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 38 | } | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 39 | |
| 588 | 40 | import scala.util.matching.Regex | 
| 41 | case class RegexParser(reg: Regex) extends Parser[String, String] {
 | |
| 42 |   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | |
| 43 | case None => Set() | |
| 44 | case Some(m) => Set((m.matched, m.after.toString)) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 588 | 48 | val NumParser = RegexParser("[0-9]+".r)
 | 
| 593 | 49 | def StringParser(s: String) = RegexParser(Regex.quote(s).r) | 
| 588 | 50 | |
| 593 | 51 | val NumParserInt = NumParser ==> (s => s.toInt) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 53 | // convenience | 
| 462 | 54 | implicit def string2parser(s: String) = StringParser(s) | 
| 593 | 55 | implicit def char2parser(c: Char) = CharParser(c) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new {
 | 
| 590 | 58 | def | (q : => Parser[I, T]) = new AltParser[I, T](p, q) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | implicit def StringOps(s: String) = new {
 | 
| 590 | 64 | def | (q : => Parser[String, String]) = new AltParser[String, String](s, q) | 
| 65 | def | (r: String) = new AltParser[String, String](s, r) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | def ~[S] (q : => Parser[String, S]) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | new SeqParser[String, String, S](s, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | def ~ (r: String) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | new SeqParser[String, String, String](s, r) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | |
| 588 | 73 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | lazy val Pal : Parser[String, String] = | 
| 593 | 75 |   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } |
 | 
| 76 |    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } | "a" | "b" | "")
 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | |
| 586 | 78 | Pal.parse_all("abaaaba")
 | 
| 593 | 79 | Pal.parse("abaaaba")
 | 
| 586 | 80 | |
| 531 | 81 | println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 83 | // well-nested parenthesis parser | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | lazy val P : Parser[String, String] = | 
| 593 | 85 |   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } | ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | P.parse_all("(((()()))())")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | P.parse_all("(((()()))()))")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | P.parse_all(")(")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 90 | P.parse_all("()")
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 92 | // arithmetic expressions | 
| 586 | 93 | |
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 94 | lazy val E: Parser[String, Int] = | 
| 593 | 95 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } |
 | 
| 96 |   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T 
 | |
| 470 
d6babe14a3a2
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
467diff
changeset | 97 | lazy val T: Parser[String, Int] = | 
| 593 | 98 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 99 | lazy val F: Parser[String, Int] = | 
| 593 | 100 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | |
| 599 | 102 | lazy val E: Parser[String, String] = | 
| 103 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} | T 
 | |
| 104 | lazy val T: Parser[String, String] = | |
| 105 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} | F
 | |
| 106 | lazy val F: Parser[String, String] = | |
| 107 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParser
 | |
| 586 | 108 | |
| 599 | 109 | println(E.parse_all("1+3+4"))
 | 
| 110 | println(E.parse("1+3+4"))
 | |
| 593 | 111 | println(E.parse_all("4*2+3"))
 | 
| 112 | println(E.parse_all("4*(2+3)"))
 | |
| 594 | 113 | println(E.parse_all("(4)*((2+3))"))
 | 
| 586 | 114 | println(E.parse_all("4/2+3"))
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 115 | println(E.parse("1 + 2 * 3"))
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 116 | println(E.parse_all("(1+2)+3"))
 | 
| 531 | 117 | println(E.parse_all("1+2+3"))  
 | 
| 118 | ||
| 586 | 119 | |
| 120 | ||
| 531 | 121 | // no left-recursion allowed, otherwise will loop | 
| 122 | lazy val EL: Parser[String, Int] = | |
| 593 | 123 |   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} | 
 | 
| 124 |    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} |
 | |
| 125 |    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} |
 | |
| 126 | NumParserInt) | |
| 531 | 127 | |
| 593 | 128 | //println(EL.parse_all("1+2+3"))
 | 
| 531 | 129 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 130 | |
| 462 | 131 | |
| 132 | ||
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 133 | // non-ambiguous vs ambiguous grammars | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 134 | lazy val S : Parser[String, String] = | 
| 593 | 135 |   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } | ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 136 | |
| 599 | 137 | S.parse("1" * 17)
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 138 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 139 | lazy val U : Parser[String, String] = | 
| 593 | 140 |   ("1" ~ U) ==> { case (x, y) => x + y  } | ""
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 141 | |
| 599 | 142 | U.parse("1" * 25)
 | 
| 531 | 143 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 144 | U.parse("11")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 145 | U.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 146 | U.parse("11011")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 147 | |
| 531 | 148 | U.parse_all("1" * 100)
 | 
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 149 | U.parse_all("1" * 100 + "0")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 150 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 151 | lazy val UCount : Parser[String, Int] = | 
| 593 | 152 |   ("1" ~ UCount) ==> { case (x, y) => y + 1 } | 
 | 
| 598 | 153 |   "" ==> { x => 0 }
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 154 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 155 | UCount.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 156 | UCount.parse_all("11111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 157 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 158 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 159 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 160 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 161 | // Single Character parser | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 162 | lazy val One : Parser[String, String] = "1" | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 163 | lazy val Two : Parser[String, String] = "2" | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 164 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 165 | One.parse("1")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 166 | One.parse("111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 167 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 168 | (One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 169 | (One ~ One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 170 | (One ~ One ~ One ~ One).parse("1111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 171 | |
| 593 | 172 | (One | Two).parse("111")
 | 
| 467 
3fc9b036321d
fixed bug
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
462diff
changeset | 173 | |
| 
3fc9b036321d
fixed bug
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
462diff
changeset | 174 | |
| 531 | 175 |