progs/comb1-test.scala
changeset 629 1b718d6065c2
equal deleted inserted replaced
628:8067d0a8ba04 629:1b718d6065c2
       
     1 import scala.language.implicitConversions
       
     2 import scala.language.reflectiveCalls
       
     3 
       
     4 /* Note, in the lectures I did not show the implicit type consraint
       
     5  * I => Seq[_], which means that the input type 'I' needs to be
       
     6  * a sequence. */
       
     7 
       
     8 type IsSeq[A] = A => Seq[_]
       
     9 
       
    10 abstract class Parser[I : IsSeq, T] {
       
    11   def parse(ts: I): Set[(T, I)]
       
    12 
       
    13   def parse_all(ts: I) : Set[T] =
       
    14     for ((head, tail) <- parse(ts); 
       
    15         if (tail.isEmpty)) yield head
       
    16 }
       
    17 
       
    18 // convenience for matching later on
       
    19 case class ~[+A, +B](_1: A, _2: B)
       
    20 
       
    21 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    22                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
       
    23   def parse(sb: I) = 
       
    24     for ((head1, tail1) <- p.parse(sb); 
       
    25          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
       
    26 }
       
    27 
       
    28 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    29                               q: => Parser[I, T]) extends Parser[I, T] {
       
    30   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
    31 }
       
    32 
       
    33 class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    34                                  f: T => S) extends Parser[I, S] {
       
    35   def parse(sb: I) = 
       
    36     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
    37 }
       
    38 
       
    39 // atomic parsers for characters, numbers and strings
       
    40 case class CharParser(c: Char) extends Parser[String, Char] {
       
    41   def parse(sb: String) = 
       
    42     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
       
    43 }
       
    44 
       
    45 import scala.util.matching.Regex
       
    46 case class RegexParser(reg: Regex) extends Parser[String, String] {
       
    47   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
       
    48     case None => Set()
       
    49     case Some(m) => Set((m.matched, m.after.toString))  
       
    50   }
       
    51 }
       
    52 
       
    53 val NumParser = RegexParser("[0-9]+".r)
       
    54 def StringParser(s: String) = RegexParser(Regex.quote(s).r)
       
    55 
       
    56 // NumParserInt2 transforms a "string integer" into an Int;
       
    57 // needs new, because FunParser is not a case class
       
    58 
       
    59 val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
       
    60 
       
    61 
       
    62 // convenience
       
    63 implicit def string2parser(s: String) = StringParser(s)
       
    64 implicit def char2parser(c: Char) = CharParser(c)
       
    65 
       
    66 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
       
    67   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    68   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
    69   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    70 }
       
    71 
       
    72 implicit def StringOps(s: String) = new {
       
    73   def || (q : => Parser[String, String]) = new AltParser[String, String](s, q)
       
    74   def || (r: String) = new AltParser[String, String](s, r)
       
    75   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
       
    76   def ~[S] (q : => Parser[String, S]) = 
       
    77     new SeqParser[String, String, S](s, q)
       
    78   def ~ (r: String) = 
       
    79     new SeqParser[String, String, String](s, r)
       
    80 }
       
    81 
       
    82 // NumParserInt can now be written as
       
    83 val NumParserInt = NumParser ==> (s => s.toInt)
       
    84 
       
    85 
       
    86 lazy val Pal : Parser[String, String] = 
       
    87   (("a" ~ Pal ~ "a") ==> { case x ~ y ~ z => x + y + z } ||
       
    88    ("b" ~ Pal ~ "b") ==> { case x ~ y ~ z => x + y + z } || "a" || "b" || "")
       
    89 
       
    90 Pal.parse_all("abaaaba")
       
    91 Pal.parse("abaaaba")
       
    92 
       
    93 println("Palindrome: " + Pal.parse_all("abaaaba"))
       
    94 
       
    95 // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
       
    96 lazy val P : Parser[String, String] = 
       
    97   "(" ~ P ~ ")" ~ P ==> { case _ ~ x ~ _ ~ y => "{" + x + "}" + y } || ""
       
    98 
       
    99 P.parse_all("(((()()))())")
       
   100 P.parse_all("(((()()))()))")
       
   101 P.parse_all(")(")
       
   102 P.parse_all("()")
       
   103 
       
   104 // Arithmetic Expressions (Terms and Factors)
       
   105 
       
   106 lazy val E: Parser[String, Int] = 
       
   107   (T ~ "+" ~ E) ==> { case x ~ y ~ z => x + z } ||
       
   108   (T ~ "-" ~ E) ==> { case x ~ y ~ z => x - z } || T 
       
   109 lazy val T: Parser[String, Int] = 
       
   110   (F ~ "*" ~ T) ==> { case x ~ y ~ z => x * z } || F
       
   111 lazy val F: Parser[String, Int] = 
       
   112   ("(" ~ E ~ ")") ==> { case x ~ y ~ z => y } || NumParserInt
       
   113 
       
   114 /* same parser but producing a string
       
   115 lazy val E: Parser[String, String] = 
       
   116   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
       
   117 lazy val T: Parser[String, String] = 
       
   118   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
       
   119 lazy val F: Parser[String, String] = 
       
   120   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
       
   121 */
       
   122 
       
   123 println(E.parse_all("1+3+4"))
       
   124 println(E.parse("1+3+4"))
       
   125 println(E.parse_all("4*2+3"))
       
   126 println(E.parse_all("4*(2+3)"))
       
   127 println(E.parse_all("(4)*((2+3))"))
       
   128 println(E.parse_all("4/2+3"))
       
   129 println(E.parse("1 + 2 * 3"))
       
   130 println(E.parse_all("(1+2)+3"))
       
   131 println(E.parse_all("1+2+3"))  
       
   132 
       
   133 
       
   134 
       
   135 // no left-recursion allowed, otherwise will loop
       
   136 lazy val EL: Parser[String, Int] = 
       
   137   (EL ~ "+" ~ EL ==> { case x ~ y ~ z => x + z} || 
       
   138    EL ~ "*" ~ EL ==> { case x ~ y ~ z => x * z} ||
       
   139    "(" ~ EL ~ ")" ==> { case x ~ y ~ z => y} ||
       
   140    NumParserInt)
       
   141 
       
   142 //println(EL.parse_all("1+2+3"))
       
   143 
       
   144 
       
   145 
       
   146 
       
   147 // non-ambiguous vs ambiguous grammars
       
   148 
       
   149 // ambiguous
       
   150 lazy val S : Parser[String, String] =
       
   151   ("1" ~ S ~ S) ==> { case x ~ y ~ z => x + y + z } || ""
       
   152 
       
   153 S.parse("1" * 10)
       
   154 
       
   155 // non-ambiguous
       
   156 lazy val U : Parser[String, String] =
       
   157   ("1" ~ U) ==> { case x ~ y => x + y  } || ""
       
   158 
       
   159 U.parse("1" * 25)
       
   160 
       
   161 U.parse("11")
       
   162 U.parse("11111")
       
   163 U.parse("11011")
       
   164 
       
   165 U.parse_all("1" * 100)
       
   166 U.parse_all("1" * 100 + "0")
       
   167 
       
   168 lazy val UCount : Parser[String, Int] =
       
   169   ("1" ~ UCount) ==> { case x ~ y => y + 1 } || "" ==> { x => 0 }
       
   170 
       
   171 UCount.parse("11111")
       
   172 UCount.parse_all("11111")
       
   173 
       
   174 
       
   175 
       
   176 // Single Character parser
       
   177 lazy val One : Parser[String, String] = "1"
       
   178 lazy val Two : Parser[String, String] = "2"
       
   179 
       
   180 One.parse("1")
       
   181 One.parse("111")
       
   182 
       
   183 (One ~ One).parse("111")
       
   184 (One ~ One ~ One).parse("111")
       
   185 (One ~ One ~ One ~ One).parse("1111")
       
   186 
       
   187 (One || Two).parse("111")
       
   188 
       
   189 
       
   190 
       
   191 
       
   192 
       
   193 // a problem with the parser -> gets slow with nestedness
       
   194 E.parse("1")
       
   195 E.parse("(1)")
       
   196 E.parse("((1))")
       
   197 E.parse("(((1)))")
       
   198 E.parse("((((1))))")
       
   199 E.parse("((((((1))))))")
       
   200 E.parse("(((((((1)))))))")