| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sun, 29 Oct 2023 13:05:09 +0000 | |
| changeset 947 | 4d787a8b79a6 | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 
92
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
1  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
2  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
3  | 
//:load matcher.scala  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
4  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
5  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
6  | 
// some regular expressions  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
7  | 
val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
8  | 
val DIGIT = RANGE("0123456789")
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
9  | 
val NONZERODIGIT = RANGE("123456789")
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
10  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
11  | 
val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER)))  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
12  | 
val BTAG = SEQS("<", NAME, ">") 
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
13  | 
val ETAG = SEQS("</", PLUS(LETTER), ">")
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
14  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
15  | 
val WORD = PLUS(ALT(LETTER, DIGIT))  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
16  | 
val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
17  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
18  | 
// for classifying the strings that have been recognised  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
19  | 
abstract class Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
20  | 
case object T_WHITESPACE extends Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
21  | 
case class T_WORD(s: String) extends Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
22  | 
case class T_ETAG(s: String) extends Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
23  | 
case class T_BTAG(s: String) extends Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
24  | 
case class T_NT(s: String, rhs: List[Token]) extends Token  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
25  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
26  | 
def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] =  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
27  | 
tokenize(rs, s.toList)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
28  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
29  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
30  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
31  | 
// lexing rules for arithmetic expressions  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
32  | 
val lexing_rules: List[Rule[Token]]=  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
33  | 
List((BTAG, (s) => T_BTAG(s.mkString)),  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
34  | 
(ETAG, (s) => T_ETAG(s.mkString)),  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
35  | 
(WORD, (s) => T_WORD(s.mkString)),  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
36  | 
(WHITESPACE, (s) => T_WHITESPACE))  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
37  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
38  | 
val ts = tokenize_file(lexing_rules, "test.html")  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
39  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
40  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
41  | 
val WIDTH = 60  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
42  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
43  | 
def is_tag(t: Token) = t match {
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
44  | 
case T_BTAG(_) => true  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
45  | 
case T_ETAG(_) => true  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
46  | 
case _ => false  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
47  | 
}  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
48  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
49  | 
def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
50  | 
case Nil => println(Console.RESET)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
51  | 
  case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
52  | 
  case T_WORD(s)::rest => {
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
53  | 
val newc = c + s.length  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
54  | 
val newstr = Console.RESET + ctr.reverse.mkString + s  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
55  | 
    if (newc < WIDTH) {
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
56  | 
print(newstr);  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
57  | 
interpret(rest, newc, ctr)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
58  | 
}  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
59  | 
    else {
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
60  | 
      print("\n" + newstr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
61  | 
interpret(rest, s.length, ctr)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
62  | 
}  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
63  | 
}  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
64  | 
  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
65  | 
  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
66  | 
  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
67  | 
  case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
68  | 
  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
69  | 
  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
70  | 
  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
 | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
71  | 
case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
72  | 
case _::rest => interpret(rest, c, ctr)  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
73  | 
}  | 
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
74  | 
|
| 
 
e85600529ca5
moved scala files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents:  
diff
changeset
 | 
75  | 
interpret(ts, 0, Nil)  |