3 // |
3 // |
4 // Call with |
4 // Call with |
5 // |
5 // |
6 // amm comb1.sc |
6 // amm comb1.sc |
7 |
7 |
8 |
8 |
9 // Note, in the lectures I did not show the type bound |
9 // Note, in the lectures I did not show the type bound |
10 // using is: I => Seq[_], which means that the input |
10 // using is: I => Seq[_], which means that the input |
11 // type 'I' needs to be a sequence. |
11 // type 'I' needs to be a sequence. |
12 |
12 |
13 type IsSeq[I] = I => Seq[_] |
13 type IsSeq[I] = I => Seq[?] |
14 |
14 |
15 abstract class Parser[I, T](using is: IsSeq[I]) { |
15 abstract class Parser[I: IsSeq, T](using is: IsSeq[I]) { |
16 def parse(in: I): Set[(T, I)] |
16 def parse(in: I): Set[(T, I)] |
17 |
17 |
18 def parse_all(in: I) : Set[T] = |
18 def parse_all(in: I) : Set[T] = |
19 for ((hd, tl) <- parse(in); |
19 for ((hd, tl) <- parse(in); |
20 if is(tl).isEmpty) yield hd |
20 if is(tl).isEmpty) yield hd |
21 } |
21 } |
22 |
22 |
23 |
23 |
24 // parser combinators |
24 // parser combinators |
25 |
25 |
26 |
26 |
27 // alternative parser |
27 // alternative parser |
28 class AltParser[I : IsSeq, T](p: => Parser[I, T], |
28 class AltParser[I : IsSeq, T](p: => Parser[I, T], |
29 q: => Parser[I, T]) extends Parser[I, T] { |
29 q: => Parser[I, T]) extends Parser[I, T] { |
30 def parse(in: I) = p.parse(in) ++ q.parse(in) |
30 def parse(in: I) = p.parse(in) ++ q.parse(in) |
31 } |
31 } |
32 |
32 |
33 // sequence parser |
33 // sequence parser |
34 class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], |
34 class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], |
35 q: => Parser[I, S]) extends Parser[I, (T, S)] { |
35 q: => Parser[I, S]) extends Parser[I, (T, S)] { |
36 def parse(in: I) = |
36 def parse(in: I) = |
37 for ((hd1, tl1) <- p.parse(in); |
37 for ((hd1, tl1) <- p.parse(in); |
38 (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) |
38 (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) |
39 } |
39 } |
40 |
40 |
41 // map parser |
41 // map parser |
42 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], |
42 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], |
43 f: T => S) extends Parser[I, S] { |
43 f: T => S) extends Parser[I, S] { |
44 def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) |
44 def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) |
45 } |
45 } |
46 |
46 |
47 |
47 |
48 |
48 |
49 // an example of an atomic parser for characters |
49 // an example of an atomic parser for characters |
50 case class CharParser(c: Char) extends Parser[String, Char] { |
50 case class CharParser(c: Char) extends Parser[String, Char] { |
51 def parse(in: String) = |
51 def parse(in: String) = |
52 if (in != "" && in.head == c) Set((c, in.tail)) else Set() |
52 if (in != "" && in.head == c) Set((c, in.tail)) else Set() |
53 } |
53 } |
54 |
54 |
55 val ap = CharParser('a') |
55 val ap = CharParser('a') |
56 val bp = CharParser('b') |
56 val bp = CharParser('b') |
62 import scala.util.matching.Regex |
62 import scala.util.matching.Regex |
63 |
63 |
64 case class RegexParser(reg: Regex) extends Parser[String, String] { |
64 case class RegexParser(reg: Regex) extends Parser[String, String] { |
65 def parse(in: String) = reg.findPrefixMatchOf(in) match { |
65 def parse(in: String) = reg.findPrefixMatchOf(in) match { |
66 case None => Set() |
66 case None => Set() |
67 case Some(m) => Set((m.matched, m.after.toString)) |
67 case Some(m) => Set((m.matched, m.after.toString)) |
68 } |
68 } |
69 } |
69 } |
70 |
70 |
71 // atomic parsers for numbers and "verbatim" strings |
71 // atomic parsers for numbers and "verbatim" strings |
72 val NumParser = RegexParser("[0-9]+".r) |
72 val NumParser = RegexParser("[0-9]+".r) |
73 def StrParser(s: String) = RegexParser(Regex.quote(s).r) |
73 def StrParser(s: String) = RegexParser(Regex.quote(s).r) |
74 |
74 |
75 NumParser.parse("123a123bc") |
75 NumParser.parse("123a123bc") |
76 StrParser("else").parse("elsethen") |
76 StrParser("else").parse("elsethen") |
77 |
77 |
78 |
78 |
79 // NumParserInt transforms a "string integer" into a propper Int |
79 // NumParserInt transforms a "string integer" into a propper Int |
80 // (needs "new" because MapParser is not a case class) |
80 // (needs "new" because MapParser is not a case class) |
81 |
81 |
82 val NumParserInt = MapParser(NumParser, (s: String) => s.toInt) |
82 val NumParserInt = MapParser(NumParser, (s: String) => s.toInt) |
83 NumParserInt.parse("123abc") |
83 NumParserInt.parse("123abc") |
84 |
84 |
85 // the following string interpolation allows us to write |
85 // the following string interpolation allows us to write |
86 // StrParser(_some_string_) more conveniently as |
86 // StrParser(_some_string_) more conveniently as |
87 // |
87 // |
88 // p"<_some_string_>" |
88 // p"<_some_string_>" |
89 |
89 |
90 extension (sc: StringContext) |
90 extension (sc: StringContext) |
91 def p(args: Any*) = StrParser(sc.s(args:_*)) |
91 def p(args: Any*) = StrParser(sc.s(args*)) |
92 |
92 |
93 |
93 |
94 (p"else").parse("elsethen") |
94 (p"else").parse("elsethen") |
95 |
95 |
96 // more convenient syntax for parser combinators |
96 // more convenient syntax for parser combinators |
97 extension [I: IsSeq, T](p: Parser[I, T]) { |
97 extension [I: IsSeq, T](p: Parser[I, T]) { |
98 def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) |
98 def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) |
99 def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
99 def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
100 def map[S](f: => T => S) = new MapParser[I, T, S](p, f) |
100 def map[S](f: => T => S) = new MapParser[I, T, S](p, f) |
101 } |
101 } |
102 |
102 |
103 // simple example of transforming the |
103 // simple example of transforming the |
104 // result into capital letters |
104 // result into capital letters |
105 def toU(s: String) = s.map(_.toUpper) |
105 def toU(s: String) = s.map(_.toUpper) |
106 |
106 |
107 (p"else").map(toU(_)).parse("elseifthen") |
107 (p"else").map(toU(_)).parse("elseifthen") |
108 |
108 |
109 // these implicits allow us to use an infix notation for |
109 // these implicits allow us to use an infix notation for |
110 // sequences and alternatives; we also can write the usual |
110 // sequences and alternatives; we also can write the usual |
111 // map for a MapParser |
111 // map for a MapParser |
112 |
112 |
119 val x = 1 + 3 |
119 val x = 1 + 3 |
120 |
120 |
121 // A parser for palindromes (just returns them as string) |
121 // A parser for palindromes (just returns them as string) |
122 // since the parser is recursive it needs to be lazy |
122 // since the parser is recursive it needs to be lazy |
123 lazy val Pal : Parser[String, String] = { |
123 lazy val Pal : Parser[String, String] = { |
124 (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || |
124 (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || |
125 (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || |
125 (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || |
126 p"a" || p"b" || p"" |
126 p"a" || p"b" || p"" |
127 } |
127 } |
128 |
128 |
129 // examples |
129 // examples |
130 Pal.parse_all("abacaba") |
130 Pal.parse_all("abacaba") |
131 Pal.parse("abacaaba") |
131 Pal.parse("abacaaba") |
132 |
132 |
133 println("Palindrome: " + Pal.parse_all("abaaaba")) |
133 println("Palindrome: " + Pal.parse_all("abaaaba")) |
134 |
134 |
135 // A parser for wellnested parentheses |
135 // A parser for wellnested parentheses |
136 // |
136 // |
137 // P ::= ( P ) P | epsilon |
137 // P ::= ( P ) P | epsilon |
138 // |
138 // |
139 // (transforms '(' -> '{' , ')' -> '}' ) |
139 // (transforms '(' -> '{' , ')' -> '}' ) |
140 lazy val P : Parser[String, String] = { |
140 lazy val P : Parser[String, String] = { |
141 (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || |
141 (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || |
142 p"" |
142 p"" |
143 } |
143 } |
144 |
144 |
145 println(P.parse_all("(((()()))())")) |
145 println(P.parse_all("(((()()))())")) |
146 println(P.parse_all("(((()()))()))")) |
146 println(P.parse_all("(((()()))()))")) |
147 println(P.parse_all(")(")) |
147 println(P.parse_all(")(")) |
148 println(P.parse_all("()")) |
148 println(P.parse_all("()")) |
170 |
170 |
171 |
171 |
172 // with parser combinators (and other parsing algorithms) |
172 // with parser combinators (and other parsing algorithms) |
173 // no left-recursion is allowed, otherwise the will loop |
173 // no left-recursion is allowed, otherwise the will loop |
174 |
174 |
175 lazy val EL: Parser[String, Int] = |
175 lazy val EL: Parser[String, Int] = |
176 ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || |
176 ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || |
177 (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || |
177 (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || |
178 (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || |
178 (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || |
179 NumParserInt) |
179 NumParserInt) |
180 |
180 |
181 // this will run forever: |
181 // this will run forever: |