| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sat, 16 May 2020 17:18:43 +0100 | |
| changeset 724 | b53b6d61fcb6 | 
| parent 686 | 5fe95ea0bad0 | 
| permissions | -rw-r--r-- | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | import scala.language.implicitConversions | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 629 | 4 | /* Note, in the lectures I did not show the implicit type | 
| 5 | * constraint IsSeq, which means that the input type 'I' needs | |
| 6 | * to be a sequence. */ | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 7 | |
| 667 | 8 | type IsSeq[A] = A => Seq[_] | 
| 629 | 9 | |
| 10 | abstract class Parser[I : IsSeq, T] {
 | |
| 461 | 11 | def parse(ts: I): Set[(T, I)] | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | |
| 461 | 13 | def parse_all(ts: I) : Set[T] = | 
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 14 | for ((head, tail) <- parse(ts); | 
| 683 | 15 | if tail.isEmpty) yield head | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | |
| 629 | 18 | class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], | 
| 19 |                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
 | |
| 461 | 20 | def parse(sb: I) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | for ((head1, tail1) <- p.parse(sb); | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | |
| 629 | 25 | class AltParser[I : IsSeq, T](p: => Parser[I, T], | 
| 26 |                               q: => Parser[I, T]) extends Parser[I, T] {
 | |
| 461 | 27 | def parse(sb: I) = p.parse(sb) ++ q.parse(sb) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | |
| 629 | 30 | class FunParser[I : IsSeq, T, S](p: => Parser[I, T], | 
| 31 |                                  f: T => S) extends Parser[I, S] {
 | |
| 461 | 32 | def parse(sb: I) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | |
| 628 | 36 | // atomic parsers for characters, numbers and strings | 
| 461 | 37 | case class CharParser(c: Char) extends Parser[String, Char] {
 | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 38 | def parse(sb: String) = | 
| 462 | 39 | if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set() | 
| 177 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 40 | } | 
| 
53def1fbf472
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
172diff
changeset | 41 | |
| 588 | 42 | import scala.util.matching.Regex | 
| 43 | case class RegexParser(reg: Regex) extends Parser[String, String] {
 | |
| 44 |   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
 | |
| 45 | case None => Set() | |
| 46 | case Some(m) => Set((m.matched, m.after.toString)) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | |
| 588 | 50 | val NumParser = RegexParser("[0-9]+".r)
 | 
| 593 | 51 | def StringParser(s: String) = RegexParser(Regex.quote(s).r) | 
| 588 | 52 | |
| 672 | 53 | // NumParserInt2 transforms a "string integer" into an actual Int; | 
| 624 | 54 | // needs new, because FunParser is not a case class | 
| 628 | 55 | val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt) | 
| 624 | 56 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | |
| 360 
c6c574d2ca0c
update
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
185diff
changeset | 58 | // convenience | 
| 462 | 59 | implicit def string2parser(s: String) = StringParser(s) | 
| 593 | 60 | implicit def char2parser(c: Char) = CharParser(c) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | |
| 624 | 62 | implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
 | 
| 686 | 63 | def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | implicit def StringOps(s: String) = new {
 | 
| 686 | 69 | def ||(q : => Parser[String, String]) = new AltParser[String, String](s, q) | 
| 70 | def ||(r: String) = new AltParser[String, String](s, r) | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) | 
| 686 | 72 | def ~[S](q : => Parser[String, S]) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | new SeqParser[String, String, S](s, q) | 
| 686 | 74 | def ~(r: String) = | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | new SeqParser[String, String, String](s, r) | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 76 | } | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | |
| 672 | 78 | // NumParserInt can now be written as _ ===> _ | 
| 79 | // the first part is the parser, and the second the | |
| 80 | // semantic action | |
| 624 | 81 | val NumParserInt = NumParser ==> (s => s.toInt) | 
| 82 | ||
| 588 | 83 | |
| 672 | 84 | // palindromes | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | lazy val Pal : Parser[String, String] = | 
| 628 | 86 |   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } ||
 | 
| 87 |    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } || "a" || "b" || "")
 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 586 | 89 | Pal.parse_all("abaaaba")
 | 
| 667 | 90 | Pal.parse_all("abacba")
 | 
| 593 | 91 | Pal.parse("abaaaba")
 | 
| 586 | 92 | |
| 531 | 93 | println("Palindrome: " + Pal.parse_all("abaaaba"))
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | |
| 672 | 95 | // parser for well-nested parentheses (transforms '(' -> '{' , ')' -> '}' )
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | lazy val P : Parser[String, String] = | 
| 628 | 97 |   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } || ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | |
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | P.parse_all("(((()()))())")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | P.parse_all("(((()()))()))")
 | 
| 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | P.parse_all(")(")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 102 | P.parse_all("()")
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 103 | |
| 672 | 104 | // just counts parentheses | 
| 673 | 105 | lazy val P2 : Parser[String, Int] = | 
| 106 |   ("(" ~ P2 ~ ")" ~ P2 ==> { case (((_, x), _), y) => x + y + 2 } || 
 | |
| 107 |    "" ==> { _ => 0 })
 | |
| 108 | ||
| 109 | P2.parse_all("(((()()))())")
 | |
| 110 | P2.parse_all("(((()()))()))")
 | |
| 667 | 111 | |
| 673 | 112 | // counts opening and closing parentheses | 
| 113 | lazy val P3 : Parser[String, Int] = | |
| 114 |   ("(" ~ P3 ==> { case (_, x) => x + 1 } ||
 | |
| 115 |    ")" ~ P3 ==> { case (_, x) => x - 1 } || 
 | |
| 116 |    "" ==> { _ => 0 })
 | |
| 117 | ||
| 118 | P3.parse_all("(((()()))())")
 | |
| 119 | P3.parse_all("(((()()))()))")
 | |
| 120 | P3.parse_all(")(")
 | |
| 667 | 121 | |
| 628 | 122 | // Arithmetic Expressions (Terms and Factors) | 
| 672 | 123 | // (because it is mutually recursive, you need :paste | 
| 124 | // for munching this definition in the REPL) | |
| 586 | 125 | |
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 126 | lazy val E: Parser[String, Int] = | 
| 628 | 127 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } ||
 | 
| 128 |   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } || T 
 | |
| 470 
d6babe14a3a2
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
467diff
changeset | 129 | lazy val T: Parser[String, Int] = | 
| 628 | 130 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } || F
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 131 | lazy val F: Parser[String, Int] = | 
| 628 | 132 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParserInt
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 133 | |
| 599 | 134 | println(E.parse_all("1+3+4"))
 | 
| 135 | println(E.parse("1+3+4"))
 | |
| 593 | 136 | println(E.parse_all("4*2+3"))
 | 
| 137 | println(E.parse_all("4*(2+3)"))
 | |
| 594 | 138 | println(E.parse_all("(4)*((2+3))"))
 | 
| 586 | 139 | println(E.parse_all("4/2+3"))
 | 
| 367 
04127a5aad23
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
366diff
changeset | 140 | println(E.parse("1 + 2 * 3"))
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 141 | println(E.parse_all("(1+2)+3"))
 | 
| 531 | 142 | println(E.parse_all("1+2+3"))  
 | 
| 143 | ||
| 686 | 144 | /* same parser but producing a string | 
| 145 | lazy val E: Parser[String, String] = | |
| 146 |   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
 | |
| 147 | lazy val T: Parser[String, String] = | |
| 148 |   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
 | |
| 149 | lazy val F: Parser[String, String] = | |
| 150 |   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
 | |
| 151 | */ | |
| 586 | 152 | |
| 531 | 153 | // no left-recursion allowed, otherwise will loop | 
| 154 | lazy val EL: Parser[String, Int] = | |
| 628 | 155 |   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} || 
 | 
| 156 |    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} ||
 | |
| 157 |    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} ||
 | |
| 593 | 158 | NumParserInt) | 
| 531 | 159 | |
| 593 | 160 | //println(EL.parse_all("1+2+3"))
 | 
| 531 | 161 | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 162 | |
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 163 | // non-ambiguous vs ambiguous grammars | 
| 628 | 164 | |
| 165 | // ambiguous | |
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 166 | lazy val S : Parser[String, String] = | 
| 686 | 167 |   ("1" ~ S ~ S ~ S) ==> { case (((x, y), z), v) => x + y + z } || ""
 | 
| 172 
47b5c91eff47
added
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 168 | |
| 628 | 169 | S.parse("1" * 10)
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 170 | |
| 628 | 171 | // non-ambiguous | 
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 172 | lazy val U : Parser[String, String] = | 
| 628 | 173 |   ("1" ~ U) ==> { case (x, y) => x + y  } || ""
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 174 | |
| 599 | 175 | U.parse("1" * 25)
 | 
| 531 | 176 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 177 | U.parse("11")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 178 | U.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 179 | U.parse("11011")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 180 | |
| 531 | 181 | U.parse_all("1" * 100)
 | 
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 182 | U.parse_all("1" * 100 + "0")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 183 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 184 | lazy val UCount : Parser[String, Int] = | 
| 628 | 185 |   ("1" ~ UCount) ==> { case (x, y) => y + 1 } || "" ==> { x => 0 }
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 186 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 187 | UCount.parse("11111")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 188 | UCount.parse_all("11111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 189 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 190 | |
| 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 191 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 192 | // Single Character parser | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 193 | lazy val One : Parser[String, String] = "1" | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 194 | lazy val Two : Parser[String, String] = "2" | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 195 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 196 | One.parse("1")
 | 
| 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 197 | One.parse("111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 198 | |
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 199 | (One ~ One).parse("111")
 | 
| 686 | 200 | (One ~ One ~ One).parse("1111")
 | 
| 366 
5a83336a9690
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
362diff
changeset | 201 | (One ~ One ~ One ~ One).parse("1111")
 | 
| 362 
57ea439feaff
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
360diff
changeset | 202 | |
| 628 | 203 | (One || Two).parse("111")
 | 
| 204 | ||
| 205 | ||
| 672 | 206 | // a problem with the arithmetic expression parser -> gets | 
| 686 | 207 | // slow with deeply nested parentheses | 
| 628 | 208 | E.parse("1")
 | 
| 209 | E.parse("(1)")
 | |
| 210 | E.parse("((1))")
 | |
| 211 | E.parse("(((1)))")
 | |
| 212 | E.parse("((((1))))")
 | |
| 213 | E.parse("((((((1))))))")
 | |
| 672 | 214 | E.parse("(((((((1)))))))")
 | 
| 686 | 215 | |
| 216 | ||
| 217 | ||
| 218 | ||
| 219 | /* | |
| 220 | ||
| 221 | starting symbols | |
| 222 | tokenise/detokenise | |
| 223 | :paste | |
| 224 | pairs in sequences | |
| 225 | ||
| 226 | */ |