progs/comb1.scala
changeset 628 8067d0a8ba04
parent 624 8d0af38389bc
child 629 1b718d6065c2
equal deleted inserted replaced
627:f5214da1976e 628:8067d0a8ba04
     1 import scala.language.implicitConversions
     1 import scala.language.implicitConversions
     2 import scala.language.reflectiveCalls
     2 import scala.language.reflectiveCalls
     3 
     3 
     4 /* Note, in the lectures I did not show the implicit type consraint
     4 /* Note, in the lectures I did not show the implicit type consraint
     5  * I => Seq[_] , which means that the input type I needs to be
     5  * I => Seq[_], which means that the input type 'I' needs to be
     6  * a sequence, subtype of Seq. */
     6  * a sequence. */
     7 
     7 
     8 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
     8 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
     9   def parse(ts: I): Set[(T, I)]
     9   def parse(ts: I): Set[(T, I)]
    10 
    10 
    11   def parse_all(ts: I) : Set[T] =
    11   def parse_all(ts: I) : Set[T] =
    29                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
    29                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
    30   def parse(sb: I) = 
    30   def parse(sb: I) = 
    31     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
    31     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
    32 }
    32 }
    33 
    33 
    34 // atomic parsers  
    34 // atomic parsers for characters, numbers and strings
    35 case class CharParser(c: Char) extends Parser[String, Char] {
    35 case class CharParser(c: Char) extends Parser[String, Char] {
    36   def parse(sb: String) = 
    36   def parse(sb: String) = 
    37     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
    37     if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set()
    38 }
    38 }
    39 
    39 
    46 }
    46 }
    47 
    47 
    48 val NumParser = RegexParser("[0-9]+".r)
    48 val NumParser = RegexParser("[0-9]+".r)
    49 def StringParser(s: String) = RegexParser(Regex.quote(s).r)
    49 def StringParser(s: String) = RegexParser(Regex.quote(s).r)
    50 
    50 
    51 // NumParserInt transforms a "string integer" into an Int;
    51 // NumParserInt2 transforms a "string integer" into an Int;
    52 // needs new, because FunParser is not a case class
    52 // needs new, because FunParser is not a case class
    53 val NumParserInt = new FunParser(NumParser, (s: String) => s.toInt)
    53 val NumParserInt2 = new FunParser(NumParser, (s: String) => s.toInt)
    54 
    54 
    55 
    55 
    56 // convenience
    56 // convenience
    57 implicit def string2parser(s: String) = StringParser(s)
    57 implicit def string2parser(s: String) = StringParser(s)
    58 implicit def char2parser(c: Char) = CharParser(c)
    58 implicit def char2parser(c: Char) = CharParser(c)
    59 
    59 
    60 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
    60 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
    61   def | (q : => Parser[I, T]) = new AltParser[I, T](p, q)
    61   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
    62   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
    62   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
    63   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    63   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    64 }
    64 }
    65 
    65 
    66 implicit def StringOps(s: String) = new {
    66 implicit def StringOps(s: String) = new {
    67   def | (q : => Parser[String, String]) = new AltParser[String, String](s, q)
    67   def || (q : => Parser[String, String]) = new AltParser[String, String](s, q)
    68   def | (r: String) = new AltParser[String, String](s, r)
    68   def || (r: String) = new AltParser[String, String](s, r)
    69   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
    69   def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f)
    70   def ~[S] (q : => Parser[String, S]) = 
    70   def ~[S] (q : => Parser[String, S]) = 
    71     new SeqParser[String, String, S](s, q)
    71     new SeqParser[String, String, S](s, q)
    72   def ~ (r: String) = 
    72   def ~ (r: String) = 
    73     new SeqParser[String, String, String](s, r)
    73     new SeqParser[String, String, String](s, r)
    76 // NumParserInt can now be written as
    76 // NumParserInt can now be written as
    77 val NumParserInt = NumParser ==> (s => s.toInt)
    77 val NumParserInt = NumParser ==> (s => s.toInt)
    78 
    78 
    79 
    79 
    80 lazy val Pal : Parser[String, String] = 
    80 lazy val Pal : Parser[String, String] = 
    81   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } |
    81   (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } ||
    82    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } | "a" | "b" | "")
    82    ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } || "a" || "b" || "")
    83 
    83 
    84 Pal.parse_all("abaaaba")
    84 Pal.parse_all("abaaaba")
    85 Pal.parse("abaaaba")
    85 Pal.parse("abaaaba")
    86 
    86 
    87 println("Palindrome: " + Pal.parse_all("abaaaba"))
    87 println("Palindrome: " + Pal.parse_all("abaaaba"))
    88 
    88 
    89 // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
    89 // well-nested parentheses parser (transforms '(' -> '{' , ')' -> '}' )
    90 lazy val P : Parser[String, String] = 
    90 lazy val P : Parser[String, String] = 
    91   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } | ""
    91   "(" ~ P ~ ")" ~ P ==> { case (((_, x), _), y) => "{" + x + "}" + y } || ""
    92 
    92 
    93 P.parse_all("(((()()))())")
    93 P.parse_all("(((()()))())")
    94 P.parse_all("(((()()))()))")
    94 P.parse_all("(((()()))()))")
    95 P.parse_all(")(")
    95 P.parse_all(")(")
    96 P.parse_all("()")
    96 P.parse_all("()")
    97 
    97 
    98 // Arithmetic Expressions
    98 // Arithmetic Expressions (Terms and Factors)
    99 
    99 
   100 lazy val E: Parser[String, Int] = 
   100 lazy val E: Parser[String, Int] = 
   101   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } |
   101   (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } ||
   102   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T 
   102   (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } || T 
   103 lazy val T: Parser[String, Int] = 
   103 lazy val T: Parser[String, Int] = 
   104   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F
   104   (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } || F
   105 lazy val F: Parser[String, Int] = 
   105 lazy val F: Parser[String, Int] = 
   106   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt
   106   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParserInt
   107 
   107 
       
   108 /* same parser but producing a string
   108 lazy val E: Parser[String, String] = 
   109 lazy val E: Parser[String, String] = 
   109   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} | T 
   110   (T ~ "+" ~ E) ==> { case ((x, y), z) => "(" + x + ")+(" + z + ")"} || T 
   110 lazy val T: Parser[String, String] = 
   111 lazy val T: Parser[String, String] = 
   111   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} | F
   112   (F ~ "*" ~ T) ==> { case ((x, y), z) => "(" + x + ")*("+ z + ")"} || F
   112 lazy val F: Parser[String, String] = 
   113 lazy val F: Parser[String, String] = 
   113   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParser
   114   ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } || NumParser
       
   115 */
   114 
   116 
   115 println(E.parse_all("1+3+4"))
   117 println(E.parse_all("1+3+4"))
   116 println(E.parse("1+3+4"))
   118 println(E.parse("1+3+4"))
   117 println(E.parse_all("4*2+3"))
   119 println(E.parse_all("4*2+3"))
   118 println(E.parse_all("4*(2+3)"))
   120 println(E.parse_all("4*(2+3)"))
   124 
   126 
   125 
   127 
   126 
   128 
   127 // no left-recursion allowed, otherwise will loop
   129 // no left-recursion allowed, otherwise will loop
   128 lazy val EL: Parser[String, Int] = 
   130 lazy val EL: Parser[String, Int] = 
   129   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} | 
   131   (EL ~ "+" ~ EL ==> { case ((x, y), z) => x + z} || 
   130    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} |
   132    EL ~ "*" ~ EL ==> { case ((x, y), z) => x * z} ||
   131    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} |
   133    "(" ~ EL ~ ")" ==> { case ((x, y), z) => y} ||
   132    NumParserInt)
   134    NumParserInt)
   133 
   135 
   134 //println(EL.parse_all("1+2+3"))
   136 //println(EL.parse_all("1+2+3"))
   135 
   137 
   136 
   138 
   137 
   139 
   138 
   140 
   139 // non-ambiguous vs ambiguous grammars
   141 // non-ambiguous vs ambiguous grammars
       
   142 
       
   143 // ambiguous
   140 lazy val S : Parser[String, String] =
   144 lazy val S : Parser[String, String] =
   141   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } | ""
   145   ("1" ~ S ~ S) ==> { case ((x, y), z) => x + y + z } || ""
   142 
   146 
   143 S.parse("1" * 17)
   147 S.parse("1" * 10)
   144 
   148 
       
   149 // non-ambiguous
   145 lazy val U : Parser[String, String] =
   150 lazy val U : Parser[String, String] =
   146   ("1" ~ U) ==> { case (x, y) => x + y  } | ""
   151   ("1" ~ U) ==> { case (x, y) => x + y  } || ""
   147 
   152 
   148 U.parse("1" * 25)
   153 U.parse("1" * 25)
   149 
   154 
   150 U.parse("11")
   155 U.parse("11")
   151 U.parse("11111")
   156 U.parse("11111")
   153 
   158 
   154 U.parse_all("1" * 100)
   159 U.parse_all("1" * 100)
   155 U.parse_all("1" * 100 + "0")
   160 U.parse_all("1" * 100 + "0")
   156 
   161 
   157 lazy val UCount : Parser[String, Int] =
   162 lazy val UCount : Parser[String, Int] =
   158   ("1" ~ UCount) ==> { case (x, y) => y + 1 } | "" ==> { x => 0 }
   163   ("1" ~ UCount) ==> { case (x, y) => y + 1 } || "" ==> { x => 0 }
   159 
   164 
   160 UCount.parse("11111")
   165 UCount.parse("11111")
   161 UCount.parse_all("11111")
   166 UCount.parse_all("11111")
   162 
   167 
   163 
   168 
   171 
   176 
   172 (One ~ One).parse("111")
   177 (One ~ One).parse("111")
   173 (One ~ One ~ One).parse("111")
   178 (One ~ One ~ One).parse("111")
   174 (One ~ One ~ One ~ One).parse("1111")
   179 (One ~ One ~ One ~ One).parse("1111")
   175 
   180 
   176 (One | Two).parse("111")
   181 (One || Two).parse("111")
   177 
   182 
   178 
   183 
   179 
   184 
       
   185 
       
   186 
       
   187 // a problem with the parser -> gets slow with nestedness
       
   188 E.parse("1")
       
   189 E.parse("(1)")
       
   190 E.parse("((1))")
       
   191 E.parse("(((1)))")
       
   192 E.parse("((((1))))")
       
   193 E.parse("((((((1))))))")
       
   194 E.parse("(((((((1)))))))")