html.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Wed, 21 Nov 2012 07:28:28 +0000
changeset 69 cc3f7908b942
parent 66 9215b9fb8852
child 71 7717f20f0504
permissions -rw-r--r--
tuned


//:load matcher.scala

// some regular expressions
val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
val WORD = PLUS(SYM)

val BTAG = SEQS("<", WORD, ">") 
val ETAG = SEQS("</", WORD, ">")

val WHITESPACE = PLUS(RANGE(" \n"))

// for classifying the strings that have been recognised
abstract class Token
case object T_WHITESPACE extends Token
case class T_WORD(s: String) extends Token
case class T_ETAG(s: String) extends Token
case class T_BTAG(s: String) extends Token
case class T_NT(s: String, rhs: List[Token]) extends Token

val lexing_rules: List[Rule[Token]] = 
  List((BTAG, (s) => T_BTAG(s.mkString)),
       (ETAG, (s) => T_ETAG(s.mkString)),
       (WORD, (s) => T_WORD(s.mkString)),
       (WHITESPACE, (s) => T_WHITESPACE))

// the tokenizer
val T = Tokenizer(lexing_rules)

// width for printing
val WIDTH = 60


def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
  case Nil => println(Console.RESET)
  case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
  case T_WORD(s)::rest => {
    val newstr = Console.RESET + ctr.reverse.mkString + s
    if (c + s.length < WIDTH) {
      print(newstr);
      interpret(rest, c + s.length, ctr)
    }
    else {
      print("\n" + newstr)
      interpret(rest, s.length, ctr)
    } 
  }
  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
  case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
  case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
  case _::rest => interpret(rest, c, ctr)
}
 
interpret(T.fromFile("test.html"), 0, Nil)