progs/comb1a.scala
changeset 734 5d860ff01938
parent 733 022e2cb1668d
child 735 fc2e3609d5e5
equal deleted inserted replaced
733:022e2cb1668d 734:5d860ff01938
     1 import scala.language.implicitConversions
       
     2 import scala.language.reflectiveCalls
       
     3 
       
     4 // more convenience for the semantic actions later on
       
     5 case class ~[+A, +B](_1: A, _2: B)
       
     6 
       
     7 
       
     8 /* Note, in the lectures I did not show the implicit type consraint
       
     9  * I => Seq[_], which means that the input type 'I' needs to be
       
    10  * a sequence. */
       
    11 
       
    12 type IsSeq[A] = A => Seq[_]
       
    13 
       
    14 abstract class Parser[I : IsSeq, T] {
       
    15   def parse(ts: I): Set[(T, I)]
       
    16 
       
    17   def parse_all(ts: I) : Set[T] =
       
    18     for ((head, tail) <- parse(ts); 
       
    19         if tail.isEmpty) yield head
       
    20 }
       
    21 
       
    22 
       
    23 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    24                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
       
    25   def parse(sb: I) = 
       
    26     for ((head1, tail1) <- p.parse(sb); 
       
    27          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
       
    28 }
       
    29 
       
    30 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    31                               q: => Parser[I, T]) extends Parser[I, T] {
       
    32   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
    33 }
       
    34 
       
    35 class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    36                                  f: T => S) extends Parser[I, S] {
       
    37   def parse(sb: I) = 
       
    38     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
    39 }
       
    40 
       
    41 // atomic parsers for characters, numbers and strings
       
    42 case class CharParser(c: Char) extends Parser[String, Char] {
       
    43   def parse(sb: String) = 
       
    44     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
       
    45 }
       
    46 
       
    47 import scala.util.matching.Regex
       
    48 case class RegexParser(reg: Regex) extends Parser[String, String] {
       
    49   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
       
    50     case None => Set()
       
    51     case Some(m) => Set((m.matched, m.after.toString))  
       
    52   }
       
    53 }
       
    54 
       
    55 val NumParser = RegexParser("[0-9]+".r)
       
    56 def StringParser(s: String) = RegexParser(Regex.quote(s).r)
       
    57 
       
    58 // NumParserInt2 transforms a "string integer" into an Int;
       
    59 // needs new, because FunParser is not a case class
       
    60 
       
    61 val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
       
    62 
       
    63 
       
    64 // convenience
       
    65 implicit def string2parser(s: String) = StringParser(s)
       
    66 implicit def char2parser(c: Char) = CharParser(c)
       
    67 
       
    68 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
       
    69   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    70   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
    71   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    72 }
       
    73 
       
    74 implicit def StringOps(s: String) = new {
       
    75   def ||(q : => Parser[String, String]) = new AltParser[String, String](s, q)
       
    76   def ||(r: String) = new AltParser[String, String](s, r)
       
    77   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
       
    78   def ~[S](q : => Parser[String, S]) = 
       
    79     new SeqParser[String, String, S](s, q)
       
    80   def ~(r: String) = 
       
    81     new SeqParser[String, String, String](s, r)
       
    82 }
       
    83 
       
    84 // NumParserInt can now be written as
       
    85 val NumParserInt = NumParser ==> (s => s.toInt)
       
    86 
       
    87 
       
    88 lazy val Pal : Parser[String, String] = 
       
    89   (("a" ~ Pal ~ "a") ==> { case x ~ y ~ z => x + y + z } ||
       
    90    ("b" ~ Pal ~ "b") ==> { case x ~ y ~ z => x + y + z } || "a" || "b" || "")
       
    91 
       
    92 Pal.parse_all("abaaaba")
       
    93 Pal.parse("abaaaba")
       
    94 
       
    95 println("Palindrome: " + Pal.parse_all("abaaaba"))
       
    96 
       
    97 // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
       
    98 lazy val P : Parser[String, String] = 
       
    99   "(" ~ P ~ ")" ~ P ==> { case _ ~ x ~ _ ~ y => "{" + x + "}" + y } || ""
       
   100 
       
   101 P.parse_all("(((()()))())")
       
   102 P.parse_all("(((()()))()))")
       
   103 P.parse_all(")(")
       
   104 P.parse_all("()")
       
   105 
       
   106 // Arithmetic Expressions (Terms and Factors)
       
   107 
       
   108 lazy val E: Parser[String, Int] = 
       
   109   (T ~ "+" ~ E) ==> { case x ~ _ ~ z => x + z } ||
       
   110   (T ~ "-" ~ E) ==> { case x ~ _ ~ z => x - z } || T 
       
   111 lazy val T: Parser[String, Int] = 
       
   112   (F ~ "*" ~ T) ==> { case x ~ _ ~ z => x * z } || F
       
   113 lazy val F: Parser[String, Int] = 
       
   114   ("(" ~ E ~ ")") ==> { case _ ~ y ~ _ => y } || NumParserInt
       
   115 
       
   116 /* same parser but producing a string
       
   117 lazy val E: Parser[String, String] = 
       
   118   (T ~ "+" ~ E) ==> { case x ~ y ~ z => "(" + x + ")+(" + z + ")"} || T 
       
   119 lazy val T: Parser[String, String] = 
       
   120   (F ~ "*" ~ T) ==> { case x ~ y ~ z => "(" + x + ")*("+ z + ")"} || F
       
   121 lazy val F: Parser[String, String] = 
       
   122   ("(" ~ E ~ ")") ==> { case x ~ y ~ z => y } || NumParser
       
   123 */
       
   124 
       
   125 println(E.parse_all("1+3+4"))
       
   126 println(E.parse("1+3+4"))
       
   127 println(E.parse_all("4*2+3"))
       
   128 println(E.parse_all("4*(2+3)"))
       
   129 println(E.parse_all("(4)*((2+3))"))
       
   130 println(E.parse_all("4/2+3"))
       
   131 println(E.parse("1 + 2 * 3"))
       
   132 println(E.parse_all("(1+2)+3"))
       
   133 println(E.parse_all("1+2+3"))  
       
   134 
       
   135 
       
   136 
       
   137 // no left-recursion allowed, otherwise will loop
       
   138 lazy val EL: Parser[String, Int] = 
       
   139   (EL ~ "+" ~ EL ==> { case x ~ y ~ z => x + z} || 
       
   140    EL ~ "*" ~ EL ==> { case x ~ y ~ z => x * z} ||
       
   141    "(" ~ EL ~ ")" ==> { case x ~ y ~ z => y} ||
       
   142    NumParserInt)
       
   143 
       
   144 //println(EL.parse_all("1+2+3"))
       
   145 
       
   146 
       
   147 
       
   148 
       
   149 // non-ambiguous vs ambiguous grammars
       
   150 
       
   151 // ambiguous
       
   152 lazy val S : Parser[String, String] =
       
   153   ("1" ~ S ~ S) ==> { case x ~ y ~ z => x + y + z } || ""
       
   154 
       
   155 S.parse("1" * 10)
       
   156 
       
   157 // non-ambiguous
       
   158 lazy val U : Parser[String, String] =
       
   159   ("1" ~ U) ==> { case x ~ y => x + y  } || ""
       
   160 
       
   161 U.parse("1" * 25)
       
   162 
       
   163 U.parse("11")
       
   164 U.parse("11111")
       
   165 U.parse("11011")
       
   166 
       
   167 U.parse_all("1" * 100)
       
   168 U.parse_all("1" * 100 + "0")
       
   169 
       
   170 lazy val UCount : Parser[String, Int] =
       
   171   ("1" ~ UCount) ==> { case x ~ y => y + 1 } || "" ==> { x => 0 }
       
   172 
       
   173 UCount.parse("11111")
       
   174 UCount.parse_all("11111")
       
   175 
       
   176 
       
   177 
       
   178 // Single Character parser
       
   179 lazy val One : Parser[String, String] = "1"
       
   180 lazy val Two : Parser[String, String] = "2"
       
   181 
       
   182 One.parse("1")
       
   183 One.parse("111")
       
   184 
       
   185 (One ~ One).parse("111")
       
   186 (One ~ One ~ One).parse("111")
       
   187 (One ~ One ~ One ~ One).parse("1111")
       
   188 
       
   189 (One || Two).parse("111")
       
   190 
       
   191 
       
   192 
       
   193 // a problem with the arithmetic expression parser -> gets 
       
   194 // slow with deeply nested parentheses
       
   195 println("Runtime problem")
       
   196 E.parse("1")
       
   197 E.parse("(1)")
       
   198 E.parse("((1))")
       
   199 E.parse("(((1)))")
       
   200 E.parse("((((1))))")
       
   201 E.parse("((((((1))))))")
       
   202 E.parse("(((((((1)))))))")
       
   203 E.parse("((((((((1)))))))")