| author | Christian Urban <urbanc@in.tum.de> | 
| Sat, 22 Oct 2016 15:18:11 +0100 | |
| changeset 459 | 858f62bc930a | 
| parent 458 | d01568431081 | 
| child 461 | d1d489cd170d | 
| permissions | -rw-r--r-- | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | import scala.language.implicitConversions | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 4 | /* Note, in the lectures I did not show the type consraint | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 5 | * I <% Seq[_] , which means that the input type I can be | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 6 | * treated, or seen, as a sequence. */ | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 7 | |
| 458 | 8 | trait Input { 
 | 
| 9 | def isEmpty: Boolean | |
| 10 | } | |
| 11 | ||
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | |
| 458 | 13 | abstract class Parser[T] {
 | 
| 14 | ||
| 15 | def parse(ts: Input): Set[(T, Input)] | |
| 16 | ||
| 17 | def parse_all(ts: Input) : Set[T] = | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 18 | for ((head, tail) <- parse(ts); | 
| 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 19 | if (tail.isEmpty)) yield head | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | |
| 458 | 22 | class SeqParser[T, S](p: => Parser[T], | 
| 23 |                       q: => Parser[S]) extends Parser[(T, S)] {
 | |
| 24 | def parse(sb: Input) = | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | for ((head1, tail1) <- p.parse(sb); | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | |
| 458 | 29 | class AltParser[T](p: => Parser[T], | 
| 30 |                    q: => Parser[T]) extends Parser[T] {
 | |
| 31 | def parse(sb: Input) = p.parse(sb) ++ q.parse(sb) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | |
| 458 | 34 | class FunParser[T, S](p: => Parser[T], | 
| 35 |                       f: T => S) extends Parser[S] {
 | |
| 36 | def parse(sb: Input) = | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | |
| 183 
b17eff695c7f
added new stuff
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
178diff
changeset | 40 | // atomic parsers | 
| 458 | 41 | case class CharParser(c: Char) extends Parser[Char] {
 | 
| 42 | type Input = String | |
| 43 | ||
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 44 | def parse(sb: String) = | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 45 | if (sb.head == c) Set((c, sb.tail)) else Set() | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 46 | } | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 47 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | case class StringParser(s: String) extends Parser[String, String] {
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 |   def parse(sb: String) = {
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | val (prefix, suffix) = sb.splitAt(s.length) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | if (prefix == s) Set((prefix, suffix)) else Set() | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | |
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 55 | case object NumParser extends Parser[String, Int] {
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | val reg = "[0-9]+".r | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 |   def parse(sb: String) = reg.findPrefixOf(sb) match {
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | case None => Set() | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 59 |     case Some(s) => Set(sb.splitAt(s.length) match {
 | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 60 | case (x, y) => (x.toInt, y) | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 61 | }) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 65 | // convenience | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | implicit def string2parser(s : String) = StringParser(s) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new {
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | implicit def StringOps(s: String) = new {
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | def || (q : => Parser[String, String]) = new AltParser[String, String](s, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 76 | def || (r: String) = new AltParser[String, String](s, r) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 78 | def ~[S] (q : => Parser[String, S]) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 79 | new SeqParser[String, String, S](s, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 | def ~ (r: String) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | new SeqParser[String, String, String](s, r) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 84 | // a parse palindromes | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | lazy val Pal : Parser[String, String] = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 |   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } ||
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 |    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } || "")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 89 | println("Palindrom: " + Pal.parse_all("ababbaba"))
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 91 | // well-nested parenthesis parser | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | lazy val P : Parser[String, String] = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 |   "(" ~ P ~ ")" ~ P ==> { case (((u, x), y), z) => "{" + x + "}" + z } || ""
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | P.parse_all("(((()()))())")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | P.parse_all("(((()()))()))")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | P.parse_all(")(")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 98 | P.parse_all("()")
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 100 | // arithmetic expressions | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 101 | lazy val E: Parser[String, Int] = | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 102 |   (F ~ "*" ~ F) ==> { case ((x, y), z) => x * z } || F 
 | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 103 | lazy val F: Parser[String, Int] = | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 104 |   ((T ~ "+" ~ T) ==> { case ((x, y), z) => x + z } ||
 | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 105 |    (T ~ "-" ~ T) ==> { case ((x, y), z) => x - z } || T)
 | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 106 | lazy val T: Parser[String, Int] = | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 107 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 108 | |
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 109 | println(E.parse("1*2+3"))
 | 
| 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 110 | println(E.parse("1 + 2 * 3"))
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 111 | println(E.parse_all("(1+2)+3"))
 | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 112 | println(E.parse_all("1+2+3"))  // this is not parsed, because of 
 | 
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 113 | // how the grammar is set up | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 114 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 115 | // non-ambiguous vs ambiguous grammars | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 116 | lazy val S : Parser[String, String] = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 117 |   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } || ""
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 118 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 119 | S.parse_all("1" * 15)
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 120 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 121 | lazy val U : Parser[String, String] = | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 122 |   ("1" ~ U) ==> { case (x, y) => x + y  } || ""
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 123 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 124 | U.parse("11")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 125 | U.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 126 | U.parse("11011")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 127 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 128 | U.parse_all("1" * 100 + "0")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 129 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 130 | lazy val UCount : Parser[String, Int] = | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 131 |   ("1" ~ UCount) ==> { case (x, y) => y + 1 } || 
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 132 |   "" ==> { (x) => 0 }
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 133 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 134 | UCount.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 135 | UCount.parse_all("11111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 136 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 137 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 138 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 139 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 140 | // Single Character parser | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 141 | lazy val One : Parser[String, String] = "1" | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 142 | lazy val Two : Parser[String, String] = "2" | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 143 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 144 | One.parse("1")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 145 | One.parse("111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 146 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 147 | (One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 148 | (One ~ One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 149 | (One ~ One ~ One ~ One).parse("1111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 150 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 151 | (One || Two).parse("111")
 |