html.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Wed, 21 Nov 2012 09:04:11 +0000
changeset 70 e6868bd2942b
parent 66 9215b9fb8852
child 71 7717f20f0504
permissions -rw-r--r--
tuned
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
66
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
//:load matcher.scala
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
// some regular expressions
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
val WORD = PLUS(SYM)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
val BTAG = SEQS("<", WORD, ">") 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
val ETAG = SEQS("</", WORD, ">")
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
val WHITESPACE = PLUS(RANGE(" \n"))
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
// for classifying the strings that have been recognised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
abstract class Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
case object T_WHITESPACE extends Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
case class T_WORD(s: String) extends Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
case class T_ETAG(s: String) extends Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
case class T_BTAG(s: String) extends Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
case class T_NT(s: String, rhs: List[Token]) extends Token
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
val lexing_rules: List[Rule[Token]] = 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
  List((BTAG, (s) => T_BTAG(s.mkString)),
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
       (ETAG, (s) => T_ETAG(s.mkString)),
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
       (WORD, (s) => T_WORD(s.mkString)),
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
       (WHITESPACE, (s) => T_WHITESPACE))
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
// the tokenizer
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
val T = Tokenizer(lexing_rules)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
// width for printing
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
val WIDTH = 60
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
  case Nil => println(Console.RESET)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
  case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
  case T_WORD(s)::rest => {
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
    val newstr = Console.RESET + ctr.reverse.mkString + s
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
    if (c + s.length < WIDTH) {
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
      print(newstr);
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
      interpret(rest, c + s.length, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
    }
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
    else {
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
      print("\n" + newstr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
      interpret(rest, s.length, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
    } 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
  }
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
  case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
  case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
  case _::rest => interpret(rest, c, ctr)
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
}
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
 
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
interpret(T.fromFile("test.html"), 0, Nil)