author | Christian Urban <christian.urban@kcl.ac.uk> |
Mon, 14 Dec 2020 19:22:12 +0000 | |
changeset 818 | 6928a677d26f |
parent 93 | 4794759139ea |
permissions | -rw-r--r-- |
92
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
//:load matcher.scala |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
// some regular expressions |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
val DIGIT = RANGE("0123456789") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
val NONZERODIGIT = RANGE("123456789") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
val BTAG = SEQS("<", NAME, ">") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
val ETAG = SEQS("</", PLUS(LETTER), ">") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
val WORD = PLUS(ALT(LETTER, DIGIT)) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
val WHITESPACE = PLUS(RANGE(" \n")) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
// for classifying the strings that have been recognised |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
abstract class Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
case object T_WHITESPACE extends Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
case class T_WORD(s: String) extends Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
case class T_ETAG(s: String) extends Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
case class T_BTAG(s: String) extends Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
case class T_NT(s: String, rhs: List[Token]) extends Token |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
tokenize(rs, s.toList) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
// lexing rules for arithmetic expressions |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
val lexing_rules: List[Rule[Token]]= |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
List((BTAG, (s) => T_BTAG(s.mkString)), |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
(ETAG, (s) => T_ETAG(s.mkString)), |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
(WORD, (s) => T_WORD(s.mkString)), |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
(WHITESPACE, (s) => T_WHITESPACE)) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
val ts = tokenize_file(lexing_rules, "test.html") |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
val WIDTH = 60 |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
def is_tag(t: Token) = t match { |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
case T_BTAG(_) => true |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
case T_ETAG(_) => true |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
case _ => false |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
} |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
case Nil => println(Console.RESET) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
case T_WORD(s)::rest => { |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
val newc = c + s.length |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
val newstr = Console.RESET + ctr.reverse.mkString + s |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
if (newc < WIDTH) { |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
print(newstr); |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
interpret(rest, newc, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
} |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
else { |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
print("\n" + newstr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
interpret(rest, s.length, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
} |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
} |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
71 |
case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
72 |
case _::rest => interpret(rest, c, ctr) |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
73 |
} |
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
74 |
|
e85600529ca5
moved scala files
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
75 |
interpret(ts, 0, Nil) |