# HG changeset patch # User Christian Urban # Date 1382887002 0 # Node ID 66b699c80479f894de6310534a3ccb9cb0cd12e1 # Parent 6c1d214c39ef5f7216d1c9b3d36b612d8cc3d695 tuned progs diff -r 6c1d214c39ef -r 66b699c80479 progs/token.scala --- a/progs/token.scala Sun Oct 27 14:17:55 2013 +0000 +++ b/progs/token.scala Sun Oct 27 15:16:42 2013 +0000 @@ -89,40 +89,38 @@ case STAR(r) => SEQ(der(c, r), STAR(r)) } -// returns the position of the last Some-element in the list -def last(stack: List[Option[Rexp]]) : Int = stack match { - case Nil => 0 - case None::stack => last(stack) - case Some(r)::stack => 1 + stack.length -} // calculates derivatives until all of them are zeroable @tailrec -def munch(cs: List[Char], rs: List[Rexp], stack: List[Option[Rexp]]) : Int = (cs, rs) match { - case (_, Nil) => last(stack) - case (Nil, _) => last(stack) - case (c::cs, rs) => { - val ds = rs.map(der(c, _)) - val rs_nzero = ds.filterNot(zeroable(_)) - val rs_nulls = ds.filter(nullable(_)) - val opt = Try(Some(rs_nulls.head)) getOrElse None - munch(cs, rs_nzero, opt::stack) +def munch(s: List[Char], + pos: Int, + rs: List[Rexp], + last: Option[Int]): Option[Int] = rs match { + case Nil => last + case rs if (s.length <= pos) => last + case rs => { + val ders = rs.map(der(s(pos), _)) + val rs_nzero = ders.filterNot(zeroable(_)) + val rs_nulls = ders.filter(nullable(_)) + val new_last = if (rs_nulls != Nil) Some(pos) else last + munch(s, 1 + pos, rs_nzero, new_last) } } // iterates the munching function and prints // out the component strings @tailrec -def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, rs, Nil) match { - case 0 if (s == "") => println("EOF") - case 0 => println(s"Lexing error: $s") - case n => { - val (head, tail) = s.splitAt(n) - print(s"|${head.replaceAll("\n","Ret")}|") +def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match { + case None if (s == "") => println("EOF") + case None => println(s"Lexing error: $s") + case Some(n) => { + val (head, tail) = s.splitAt(n + 1) + print(s"|${head.replaceAll("\n","RET")}|") tokenize(tail, rs) } } + val test_prog = """ start := XXX; x := start; diff -r 6c1d214c39ef -r 66b699c80479 progs/token2.scala --- a/progs/token2.scala Sun Oct 27 14:17:55 2013 +0000 +++ b/progs/token2.scala Sun Oct 27 15:16:42 2013 +0000 @@ -113,36 +113,31 @@ case STAR(r) => SEQ(der(c, r), STAR(r)) } -// returns the position and Token function of -// the last Some-element in the list -def last(stack: List[Option[TokenFun]]) : Option[(Int, TokenFun)] = stack match { - case Nil => None - case None::stack => last(stack) - case Some(tf)::stack => Some(1 + stack.length, tf) -} + // calculates derivatives until all of them are zeroable @tailrec -def munch(cs: List[Char], +def munch(s: List[Char], + pos: Int, rs: LexRules, - stack: List[Option[TokenFun]]) : Option[(Int, TokenFun)] = (cs, rs) match { - case (_, Nil) => last(stack) - case (Nil, _) => last(stack) - case (c::cs, rs) => { - val ds = rs.map({case (r, tf) => (der(c, r), tf)}) - val rs_nzero = ds.filterNot({case (r, _) => zeroable(r)}) - val rs_nulls = ds.filter({case (r, _) => nullable(r)}) - val opt = Try(Some(rs_nulls.head._2)) getOrElse None - munch(cs, rs_nzero, opt::stack) + last: Option[(Int, TokenFun)]): Option[(Int, TokenFun)] = rs match { + case Nil => last + case rs if (s.length <= pos) => last + case rs => { + val ders = rs.map({case (r, tf) => (der(s(pos), r), tf)}) + val rs_nzero = ders.filterNot({case (r, _) => zeroable(r)}) + val rs_nulls = ders.filter({case (r, _) => nullable(r)}) + val new_last = if (rs_nulls != Nil) Some((pos, rs_nulls.head._2)) else last + munch(s, 1 + pos, rs_nzero, new_last) } } // iterates the munching function and returns a Token list -def tokenize(s: String, rs: LexRules) : List[Token] = munch(s.toList, rs, Nil) match { +def tokenize(s: String, rs: LexRules) : List[Token] = munch(s.toList, 0, rs, None) match { case None if (s == "") => Nil case None => List(T_ERR("Lexing error: $s")) case Some((n, tf)) => { - val (head, tail) = s.splitAt(n) + val (head, tail) = s.splitAt(n + 1) tf(head)::tokenize(tail, rs) } }