diff -r 719fd738d2a0 -r 1a4065f965fb matcher.scala --- a/matcher.scala Wed Nov 28 08:28:26 2012 +0000 +++ b/matcher.scala Mon Dec 03 15:35:27 2012 +0000 @@ -1,5 +1,7 @@ +package object matcher { -// regular expressions including NOT +// regular expressions +// including constructors for NOT and ALLC abstract class Rexp case object NULL extends Rexp @@ -53,6 +55,41 @@ case NOT(r) => NOT(der (c, r)) } +// main class for the tokenizer +case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { + +def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + +def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) +} + +def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } +} + +def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + +def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + // regular expression for specifying // ranges of characters def Range(s : List[Char]) : Rexp = s match { @@ -90,39 +127,4 @@ } implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -type Rule[T] = (Rexp, List[Char] => T) - -case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { - - def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - - def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) - } - - def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } - } - - def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - - def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - } -