--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/html.scala Wed Nov 21 02:20:16 2012 +0000
@@ -0,0 +1,59 @@
+
+//:load matcher.scala
+
+// some regular expressions
+val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
+val WORD = PLUS(SYM)
+
+val BTAG = SEQS("<", WORD, ">")
+val ETAG = SEQS("</", WORD, ">")
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case class T_WORD(s: String) extends Token
+case class T_ETAG(s: String) extends Token
+case class T_BTAG(s: String) extends Token
+case class T_NT(s: String, rhs: List[Token]) extends Token
+
+val lexing_rules: List[Rule[Token]] =
+ List((BTAG, (s) => T_BTAG(s.mkString)),
+ (ETAG, (s) => T_ETAG(s.mkString)),
+ (WORD, (s) => T_WORD(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE))
+
+// the tokenizer
+val T = Tokenizer(lexing_rules)
+
+// width for printing
+val WIDTH = 60
+
+
+def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
+ case Nil => println(Console.RESET)
+ case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
+ case T_WORD(s)::rest => {
+ val newstr = Console.RESET + ctr.reverse.mkString + s
+ if (c + s.length < WIDTH) {
+ print(newstr);
+ interpret(rest, c + s.length, ctr)
+ }
+ else {
+ print("\n" + newstr)
+ interpret(rest, s.length, ctr)
+ }
+ }
+ case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
+ case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
+ case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
+ case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
+ case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
+ case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
+ case _::rest => interpret(rest, c, ctr)
+}
+
+interpret(T.fromFile("test.html"), 0, Nil)