| author | Christian Urban <urbanc@in.tum.de> | 
| Sun, 28 Jul 2019 14:24:46 +0100 | |
| changeset 624 | e50096adda15 | 
| parent 599 | 0b512541f7ce | 
| child 628 | eb227bc91a26 | 
| permissions | -rw-r--r-- | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | import scala.language.implicitConversions | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 624 | 4 | /* Note, in the lectures I did not show the implicit type consraint | 
| 5 | * I => Seq[_] , which means that the input type I needs to be | |
| 6 | * a sequence, subtype of Seq. */ | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 7 | |
| 624 | 8 | abstract class Parser[I, T](implicit ev: I => Seq[_]) {
 | 
| 461 | 9 | def parse(ts: I): Set[(T, I)] | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | |
| 461 | 11 | def parse_all(ts: I) : Set[T] = | 
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 12 | for ((head, tail) <- parse(ts); | 
| 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 13 | if (tail.isEmpty)) yield head | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | |
| 624 | 16 | class SeqParser[I, T, S](p: => Parser[I, T], | 
| 17 |                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] {
 | |
| 461 | 18 | def parse(sb: I) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | for ((head1, tail1) <- p.parse(sb); | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | |
| 624 | 23 | class AltParser[I, T](p: => Parser[I, T], | 
| 24 |                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
 | |
| 461 | 25 | def parse(sb: I) = p.parse(sb) ++ q.parse(sb) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | |
| 624 | 28 | class FunParser[I, T, S](p: => Parser[I, T], | 
| 29 |                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
 | |
| 461 | 30 | def parse(sb: I) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | |
| 183 
b17eff695c7f
added new stuff
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
178diff
changeset | 34 | // atomic parsers | 
| 461 | 35 | case class CharParser(c: Char) extends Parser[String, Char] {
 | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 36 | def parse(sb: String) = | 
| 462 | 37 | if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set() | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 38 | } | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 39 | |
| 588 | 40 | import scala.util.matching.Regex | 
| 41 | case class RegexParser(reg: Regex) extends Parser[String, String] {
 | |
| 42 |   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | |
| 43 | case None => Set() | |
| 44 | case Some(m) => Set((m.matched, m.after.toString)) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 588 | 48 | val NumParser = RegexParser("[0-9]+".r)
 | 
| 593 | 49 | def StringParser(s: String) = RegexParser(Regex.quote(s).r) | 
| 588 | 50 | |
| 624 | 51 | // NumParserInt transforms a "string integer" into an Int; | 
| 52 | // needs new, because FunParser is not a case class | |
| 53 | val NumParserInt = new FunParser(NumParser, (s: String) => s.toInt) | |
| 54 | ||
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 56 | // convenience | 
| 462 | 57 | implicit def string2parser(s: String) = StringParser(s) | 
| 593 | 58 | implicit def char2parser(c: Char) = CharParser(c) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | |
| 624 | 60 | implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
 | 
| 590 | 61 | def | (q : => Parser[I, T]) = new AltParser[I, T](p, q) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | implicit def StringOps(s: String) = new {
 | 
| 590 | 67 | def | (q : => Parser[String, String]) = new AltParser[String, String](s, q) | 
| 68 | def | (r: String) = new AltParser[String, String](s, r) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | def ~[S] (q : => Parser[String, S]) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | new SeqParser[String, String, S](s, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | def ~ (r: String) = | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | new SeqParser[String, String, String](s, r) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | |
| 624 | 76 | // NumParserInt can now be written as | 
| 77 | val NumParserInt = NumParser ==> (s => s.toInt) | |
| 78 | ||
| 588 | 79 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 | lazy val Pal : Parser[String, String] = | 
| 593 | 81 |   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } |
 | 
| 82 |    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } | "a" | "b" | "")
 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | |
| 586 | 84 | Pal.parse_all("abaaaba")
 | 
| 593 | 85 | Pal.parse("abaaaba")
 | 
| 586 | 86 | |
| 531 | 87 | println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 624 | 89 | // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 | lazy val P : Parser[String, String] = | 
| 593 | 91 |   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } | ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 | P.parse_all("(((()()))())")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | P.parse_all("(((()()))()))")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | P.parse_all(")(")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 96 | P.parse_all("()")
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | |
| 624 | 98 | // Arithmetic Expressions | 
| 586 | 99 | |
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 100 | lazy val E: Parser[String, Int] = | 
| 593 | 101 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } |
 | 
| 102 |   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T 
 | |
| 470 
d6babe14a3a2
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
467diff
changeset | 103 | lazy val T: Parser[String, Int] = | 
| 593 | 104 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 105 | lazy val F: Parser[String, Int] = | 
| 593 | 106 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 107 | |
| 599 | 108 | lazy val E: Parser[String, String] = | 
| 109 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} | T 
 | |
| 110 | lazy val T: Parser[String, String] = | |
| 111 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} | F
 | |
| 112 | lazy val F: Parser[String, String] = | |
| 113 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParser
 | |
| 586 | 114 | |
| 599 | 115 | println(E.parse_all("1+3+4"))
 | 
| 116 | println(E.parse("1+3+4"))
 | |
| 593 | 117 | println(E.parse_all("4*2+3"))
 | 
| 118 | println(E.parse_all("4*(2+3)"))
 | |
| 594 | 119 | println(E.parse_all("(4)*((2+3))"))
 | 
| 586 | 120 | println(E.parse_all("4/2+3"))
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 121 | println(E.parse("1 + 2 * 3"))
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 122 | println(E.parse_all("(1+2)+3"))
 | 
| 531 | 123 | println(E.parse_all("1+2+3"))  
 | 
| 124 | ||
| 586 | 125 | |
| 126 | ||
| 531 | 127 | // no left-recursion allowed, otherwise will loop | 
| 128 | lazy val EL: Parser[String, Int] = | |
| 593 | 129 |   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} | 
 | 
| 130 |    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} |
 | |
| 131 |    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} |
 | |
| 132 | NumParserInt) | |
| 531 | 133 | |
| 593 | 134 | //println(EL.parse_all("1+2+3"))
 | 
| 531 | 135 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 136 | |
| 462 | 137 | |
| 138 | ||
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 139 | // non-ambiguous vs ambiguous grammars | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 140 | lazy val S : Parser[String, String] = | 
| 593 | 141 |   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } | ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 142 | |
| 599 | 143 | S.parse("1" * 17)
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 144 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 145 | lazy val U : Parser[String, String] = | 
| 593 | 146 |   ("1" ~ U) ==> { case (x, y) => x + y  } | ""
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 147 | |
| 599 | 148 | U.parse("1" * 25)
 | 
| 531 | 149 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 150 | U.parse("11")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 151 | U.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 152 | U.parse("11011")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 153 | |
| 531 | 154 | U.parse_all("1" * 100)
 | 
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 155 | U.parse_all("1" * 100 + "0")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 156 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 157 | lazy val UCount : Parser[String, Int] = | 
| 624 | 158 |   ("1" ~ UCount) ==> { case (x, y) => y + 1 } | "" ==> { x => 0 }
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 159 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 160 | UCount.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 161 | UCount.parse_all("11111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 162 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 163 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 164 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 165 | // Single Character parser | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 166 | lazy val One : Parser[String, String] = "1" | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 167 | lazy val Two : Parser[String, String] = "2" | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 168 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 169 | One.parse("1")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 170 | One.parse("111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 171 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 172 | (One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 173 | (One ~ One ~ One).parse("111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 174 | (One ~ One ~ One ~ One).parse("1111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 175 | |
| 593 | 176 | (One | Two).parse("111")
 | 
| 467 
3fc9b036321d
fixed bug
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
462diff
changeset | 177 | |
| 
3fc9b036321d
fixed bug
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
462diff
changeset | 178 | |
| 531 | 179 |