|
1 |
|
2 //:load matcher.scala |
|
3 |
|
4 // some regular expressions |
|
5 val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") |
|
6 val WORD = PLUS(SYM) |
|
7 |
|
8 val BTAG = SEQS("<", WORD, ">") |
|
9 val ETAG = SEQS("</", WORD, ">") |
|
10 |
|
11 val WHITESPACE = PLUS(RANGE(" \n")) |
|
12 |
|
13 // for classifying the strings that have been recognised |
|
14 abstract class Token |
|
15 case object T_WHITESPACE extends Token |
|
16 case class T_WORD(s: String) extends Token |
|
17 case class T_ETAG(s: String) extends Token |
|
18 case class T_BTAG(s: String) extends Token |
|
19 case class T_NT(s: String, rhs: List[Token]) extends Token |
|
20 |
|
21 val lexing_rules: List[Rule[Token]] = |
|
22 List((BTAG, (s) => T_BTAG(s.mkString)), |
|
23 (ETAG, (s) => T_ETAG(s.mkString)), |
|
24 (WORD, (s) => T_WORD(s.mkString)), |
|
25 (WHITESPACE, (s) => T_WHITESPACE)) |
|
26 |
|
27 // the tokenizer |
|
28 val T = Tokenizer(lexing_rules) |
|
29 |
|
30 // width for printing |
|
31 val WIDTH = 60 |
|
32 |
|
33 |
|
34 def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { |
|
35 case Nil => println(Console.RESET) |
|
36 case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) |
|
37 case T_WORD(s)::rest => { |
|
38 val newstr = Console.RESET + ctr.reverse.mkString + s |
|
39 if (c + s.length < WIDTH) { |
|
40 print(newstr); |
|
41 interpret(rest, c + s.length, ctr) |
|
42 } |
|
43 else { |
|
44 print("\n" + newstr) |
|
45 interpret(rest, s.length, ctr) |
|
46 } |
|
47 } |
|
48 case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr) |
|
49 case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr) |
|
50 case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr) |
|
51 case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) |
|
52 case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr) |
|
53 case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr) |
|
54 case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr) |
|
55 case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) |
|
56 case _::rest => interpret(rest, c, ctr) |
|
57 } |
|
58 |
|
59 interpret(T.fromFile("test.html"), 0, Nil) |