author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Wed, 21 Nov 2012 09:04:11 +0000 | |
changeset 70 | e6868bd2942b |
parent 66 | 9215b9fb8852 |
child 71 | 7717f20f0504 |
permissions | -rw-r--r-- |
66
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
//:load matcher.scala |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
// some regular expressions |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
val WORD = PLUS(SYM) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
val BTAG = SEQS("<", WORD, ">") |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
val ETAG = SEQS("</", WORD, ">") |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
val WHITESPACE = PLUS(RANGE(" \n")) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
// for classifying the strings that have been recognised |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
abstract class Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
case object T_WHITESPACE extends Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
case class T_WORD(s: String) extends Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
case class T_ETAG(s: String) extends Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
case class T_BTAG(s: String) extends Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
case class T_NT(s: String, rhs: List[Token]) extends Token |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
val lexing_rules: List[Rule[Token]] = |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
List((BTAG, (s) => T_BTAG(s.mkString)), |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
(ETAG, (s) => T_ETAG(s.mkString)), |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
(WORD, (s) => T_WORD(s.mkString)), |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
(WHITESPACE, (s) => T_WHITESPACE)) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
// the tokenizer |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
val T = Tokenizer(lexing_rules) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
// width for printing |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
val WIDTH = 60 |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
case Nil => println(Console.RESET) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
case T_WORD(s)::rest => { |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
val newstr = Console.RESET + ctr.reverse.mkString + s |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
if (c + s.length < WIDTH) { |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
print(newstr); |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
interpret(rest, c + s.length, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
} |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
else { |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
print("\n" + newstr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
interpret(rest, s.length, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
} |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
} |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
case _::rest => interpret(rest, c, ctr) |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
} |
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
|
9215b9fb8852
tuned
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
interpret(T.fromFile("test.html"), 0, Nil) |