|
1 |
|
2 |
|
3 //:load matcher.scala |
|
4 |
|
5 |
|
6 // some regular expressions |
|
7 val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") |
|
8 val DIGIT = RANGE("0123456789") |
|
9 val NONZERODIGIT = RANGE("123456789") |
|
10 |
|
11 val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) |
|
12 val BTAG = SEQS("<", NAME, ">") |
|
13 val ETAG = SEQS("</", PLUS(LETTER), ">") |
|
14 |
|
15 val WORD = PLUS(ALT(LETTER, DIGIT)) |
|
16 val WHITESPACE = PLUS(RANGE(" \n")) |
|
17 |
|
18 // for classifying the strings that have been recognised |
|
19 abstract class Token |
|
20 case object T_WHITESPACE extends Token |
|
21 case class T_WORD(s: String) extends Token |
|
22 case class T_ETAG(s: String) extends Token |
|
23 case class T_BTAG(s: String) extends Token |
|
24 case class T_NT(s: String, rhs: List[Token]) extends Token |
|
25 |
|
26 def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = |
|
27 tokenize(rs, s.toList) |
|
28 |
|
29 |
|
30 |
|
31 // lexing rules for arithmetic expressions |
|
32 val lexing_rules: List[Rule[Token]]= |
|
33 List((BTAG, (s) => T_BTAG(s.mkString)), |
|
34 (ETAG, (s) => T_ETAG(s.mkString)), |
|
35 (WORD, (s) => T_WORD(s.mkString)), |
|
36 (WHITESPACE, (s) => T_WHITESPACE)) |
|
37 |
|
38 val ts = tokenize_file(lexing_rules, "test.html") |
|
39 |
|
40 |
|
41 val WIDTH = 60 |
|
42 |
|
43 def is_tag(t: Token) = t match { |
|
44 case T_BTAG(_) => true |
|
45 case T_ETAG(_) => true |
|
46 case _ => false |
|
47 } |
|
48 |
|
49 def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { |
|
50 case Nil => println(Console.RESET) |
|
51 case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) |
|
52 case T_WORD(s)::rest => { |
|
53 val newc = c + s.length |
|
54 val newstr = Console.RESET + ctr.reverse.mkString + s |
|
55 if (newc < WIDTH) { |
|
56 print(newstr); |
|
57 interpret(rest, newc, ctr) |
|
58 } |
|
59 else { |
|
60 print("\n" + newstr) |
|
61 interpret(rest, s.length, ctr) |
|
62 } |
|
63 } |
|
64 case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr) |
|
65 case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr) |
|
66 case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr) |
|
67 case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) |
|
68 case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr) |
|
69 case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr) |
|
70 case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr) |
|
71 case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) |
|
72 case _::rest => interpret(rest, c, ctr) |
|
73 } |
|
74 |
|
75 interpret(ts, 0, Nil) |