progs/comb1.scala
changeset 734 5d860ff01938
parent 733 022e2cb1668d
child 735 fc2e3609d5e5
equal deleted inserted replaced
733:022e2cb1668d 734:5d860ff01938
     1 import scala.language.implicitConversions
       
     2 import scala.language.reflectiveCalls
       
     3 
       
     4 /* Note, in the lectures I did not show the implicit type 
       
     5  * constraint IsSeq, which means that the input type 'I' needs 
       
     6  * to be a sequence. */
       
     7 
       
     8 type IsSeq[A] = A => Seq[_]
       
     9 
       
    10 abstract class Parser[I : IsSeq, T] {
       
    11   def parse(ts: I): Set[(T, I)]
       
    12 
       
    13   def parse_all(ts: I) : Set[T] =
       
    14     for ((head, tail) <- parse(ts); 
       
    15         if tail.isEmpty) yield head
       
    16 }
       
    17 
       
    18 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    19                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
       
    20   def parse(sb: I) = 
       
    21     for ((head1, tail1) <- p.parse(sb); 
       
    22          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
       
    23 }
       
    24 
       
    25 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    26                               q: => Parser[I, T]) extends Parser[I, T] {
       
    27   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
    28 }
       
    29 
       
    30 class FunParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    31                                  f: T => S) extends Parser[I, S] {
       
    32   def parse(sb: I) = 
       
    33     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
    34 }
       
    35 
       
    36 // atomic parsers for characters, numbers and strings
       
    37 case class CharParser(c: Char) extends Parser[String, Char] {
       
    38   def parse(sb: String) = 
       
    39     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
       
    40 }
       
    41 
       
    42 import scala.util.matching.Regex
       
    43 case class RegexParser(reg: Regex) extends Parser[String, String] {
       
    44   def parse(sb: String) = reg.findPrefixMatchOf(sb) match {
       
    45     case None => Set()
       
    46     case Some(m) => Set((m.matched, m.after.toString))  
       
    47   }
       
    48 }
       
    49 
       
    50 val NumParser = RegexParser("[0-9]+".r)
       
    51 def StringParser(s: String) = RegexParser(Regex.quote(s).r)
       
    52 
       
    53 // NumParserInt2 transforms a "string integer" into an actual Int;
       
    54 // needs new, because FunParser is not a case class
       
    55 val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
       
    56 
       
    57 
       
    58 // convenience
       
    59 implicit def string2parser(s: String) = StringParser(s)
       
    60 implicit def char2parser(c: Char) = CharParser(c)
       
    61 
       
    62 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
       
    63   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    64   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
    65   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    66 }
       
    67 
       
    68 implicit def StringOps(s: String) = new {
       
    69   def ||(q : => Parser[String, String]) = new AltParser[String, String](s, q)
       
    70   def ||(r: String) = new AltParser[String, String](s, r)
       
    71   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
       
    72   def ~[S](q : => Parser[String, S]) = 
       
    73     new SeqParser[String, String, S](s, q)
       
    74   def ~(r: String) = 
       
    75     new SeqParser[String, String, String](s, r)
       
    76 }
       
    77 
       
    78 // NumParserInt can now be written as _ ===> _
       
    79 // the first part is the parser, and the second the 
       
    80 // semantic action
       
    81 val NumParserInt = NumParser ==> (s => s.toInt)
       
    82 
       
    83 
       
    84 // palindromes
       
    85 lazy val Pal : Parser[String, String] = 
       
    86   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } ||
       
    87    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } || "a" || "b" || "")
       
    88 
       
    89 Pal.parse_all("abaaaba")
       
    90 Pal.parse_all("abacba")
       
    91 Pal.parse("abaaaba")
       
    92 
       
    93 println("Palindrome: " + Pal.parse_all("abaaaba"))
       
    94 
       
    95 // parser for well-nested parentheses (transforms '(' -> '{' , ')' -> '}' )
       
    96 lazy val P : Parser[String, String] = 
       
    97   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } || ""
       
    98 
       
    99 P.parse_all("(((()()))())")
       
   100 P.parse_all("(((()()))()))")
       
   101 P.parse_all(")(")
       
   102 P.parse_all("()")
       
   103 
       
   104 // just counts parentheses
       
   105 lazy val P2 : Parser[String, Int] = 
       
   106   ("(" ~ P2 ~ ")" ~ P2 ==> { case (((_, x), _), y) => x + y + 2 } || 
       
   107    "" ==> { _ => 0 })
       
   108 
       
   109 P2.parse_all("(((()()))())")
       
   110 P2.parse_all("(((()()))()))")
       
   111 
       
   112 // counts opening and closing parentheses
       
   113 lazy val P3 : Parser[String, Int] = 
       
   114   ("(" ~ P3 ==> { case (_, x) => x + 1 } ||
       
   115    ")" ~ P3 ==> { case (_, x) => x - 1 } || 
       
   116    "" ==> { _ => 0 })
       
   117 
       
   118 P3.parse_all("(((()()))())")
       
   119 P3.parse_all("(((()()))()))")
       
   120 P3.parse_all(")(")
       
   121 
       
   122 // Arithmetic Expressions (Terms and Factors)
       
   123 // (because it is mutually recursive, you need :paste 
       
   124 //  for munching this definition in the REPL)
       
   125 
       
   126 lazy val E: Parser[String, Int] = 
       
   127   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } ||
       
   128   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } || T 
       
   129 lazy val T: Parser[String, Int] = 
       
   130   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } || F
       
   131 lazy val F: Parser[String, Int] = 
       
   132   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParserInt
       
   133 
       
   134 println(E.parse_all("1+3+4"))
       
   135 println(E.parse("1+3+4"))
       
   136 println(E.parse_all("4*2+3"))
       
   137 println(E.parse_all("4*(2+3)"))
       
   138 println(E.parse_all("(4)*((2+3))"))
       
   139 println(E.parse_all("4/2+3"))
       
   140 println(E.parse("1 + 2 * 3"))
       
   141 println(E.parse_all("(1+2)+3"))
       
   142 println(E.parse_all("1+2+3"))  
       
   143 
       
   144 /* same parser but producing a string
       
   145 lazy val E: Parser[String, String] = 
       
   146   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
       
   147 lazy val T: Parser[String, String] = 
       
   148   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
       
   149 lazy val F: Parser[String, String] = 
       
   150   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
       
   151 */
       
   152 
       
   153 // no left-recursion allowed, otherwise will loop
       
   154 lazy val EL: Parser[String, Int] = 
       
   155   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} || 
       
   156    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} ||
       
   157    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} ||
       
   158    NumParserInt)
       
   159 
       
   160 //println(EL.parse_all("1+2+3"))
       
   161 
       
   162 
       
   163 // non-ambiguous vs ambiguous grammars
       
   164 
       
   165 // ambiguous
       
   166 lazy val S : Parser[String, String] =
       
   167   ("1" ~ S ~ S ~ S) ==> { case (((x, y), z), v) => x + y + z } || ""
       
   168 
       
   169 S.parse("1" * 10)
       
   170 
       
   171 // non-ambiguous
       
   172 lazy val U : Parser[String, String] =
       
   173   ("1" ~ U) ==> { case (x, y) => x + y  } || ""
       
   174 
       
   175 U.parse("1" * 25)
       
   176 
       
   177 U.parse("11")
       
   178 U.parse("11111")
       
   179 U.parse("11011")
       
   180 
       
   181 U.parse_all("1" * 100)
       
   182 U.parse_all("1" * 100 + "0")
       
   183 
       
   184 lazy val UCount : Parser[String, Int] =
       
   185   ("1" ~ UCount) ==> { case (x, y) => y + 1 } || "" ==> { x => 0 }
       
   186 
       
   187 UCount.parse("11111")
       
   188 UCount.parse_all("11111")
       
   189 
       
   190 
       
   191 
       
   192 // Single Character parser
       
   193 lazy val One : Parser[String, String] = "1"
       
   194 lazy val Two : Parser[String, String] = "2"
       
   195 
       
   196 One.parse("1")
       
   197 One.parse("111")
       
   198 
       
   199 (One ~ One).parse("111")
       
   200 (One ~ One ~ One).parse("1111")
       
   201 (One ~ One ~ One ~ One).parse("1111")
       
   202 
       
   203 (One || Two).parse("111")
       
   204 
       
   205 
       
   206 // a problem with the arithmetic expression parser -> gets 
       
   207 // slow with deeply nested parentheses
       
   208 E.parse("1")
       
   209 E.parse("(1)")
       
   210 E.parse("((1))")
       
   211 E.parse("(((1)))")
       
   212 E.parse("((((1))))")
       
   213 E.parse("((((((1))))))")
       
   214 E.parse("(((((((1)))))))")
       
   215 
       
   216 
       
   217 
       
   218 
       
   219 /*
       
   220 
       
   221 starting symbols
       
   222 tokenise/detokenise
       
   223 :paste
       
   224 pairs in sequences
       
   225 
       
   226 */