progs/html1.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Fri, 27 Sep 2013 15:43:25 +0100
changeset 116 010ae7288327
parent 93 4794759139ea
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
92
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
//:load matcher.scala
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
// some regular expressions
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
val DIGIT = RANGE("0123456789")
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
val NONZERODIGIT = RANGE("123456789")
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER)))
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
val BTAG = SEQS("<", NAME, ">") 
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
val ETAG = SEQS("</", PLUS(LETTER), ">")
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
val WORD = PLUS(ALT(LETTER, DIGIT))
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
val WHITESPACE = PLUS(RANGE(" \n"))
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
// for classifying the strings that have been recognised
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
abstract class Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
case object T_WHITESPACE extends Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
case class T_WORD(s: String) extends Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
case class T_ETAG(s: String) extends Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
case class T_BTAG(s: String) extends Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
case class T_NT(s: String, rhs: List[Token]) extends Token
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
  tokenize(rs, s.toList)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
// lexing rules for arithmetic expressions
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
val lexing_rules: List[Rule[Token]]= 
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
  List((BTAG, (s) => T_BTAG(s.mkString)),
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
       (ETAG, (s) => T_ETAG(s.mkString)),
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
       (WORD, (s) => T_WORD(s.mkString)),
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
       (WHITESPACE, (s) => T_WHITESPACE))
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
val ts = tokenize_file(lexing_rules, "test.html")
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
val WIDTH = 60
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
def is_tag(t: Token) = t match {
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
  case T_BTAG(_) => true
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
  case T_ETAG(_) => true
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
  case _ => false
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
}
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
  case Nil => println(Console.RESET)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
  case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
  case T_WORD(s)::rest => {
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
    val newc = c + s.length
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
    val newstr = Console.RESET + ctr.reverse.mkString + s
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
    if (newc < WIDTH) {
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
      print(newstr);
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
      interpret(rest, newc, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
    }
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
    else {
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    60
      print("\n" + newstr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    61
      interpret(rest, s.length, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    62
    } 
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    63
  }
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    65
  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    66
  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    67
  case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    68
  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    69
  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    70
  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    71
  case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    72
  case _::rest => interpret(rest, c, ctr)
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    73
}
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    74
 
e85600529ca5 moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
interpret(ts, 0, Nil)