| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Tue, 31 Oct 2023 12:52:36 +0000 | |
| changeset 951 | a6a5ba526d73 | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | //:load matcher.scala | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 6 | // some regular expressions | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 8 | val DIGIT = RANGE("0123456789")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | val NONZERODIGIT = RANGE("123456789")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | val BTAG = SEQS("<", NAME, ">") 
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 13 | val ETAG = SEQS("</", PLUS(LETTER), ">")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | val WORD = PLUS(ALT(LETTER, DIGIT)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 18 | // for classifying the strings that have been recognised | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | abstract class Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | case object T_WHITESPACE extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | case class T_WORD(s: String) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | case class T_ETAG(s: String) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | case class T_BTAG(s: String) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | case class T_NT(s: String, rhs: List[Token]) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | tokenize(rs, s.toList) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 30 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | // lexing rules for arithmetic expressions | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | val lexing_rules: List[Rule[Token]]= | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | List((BTAG, (s) => T_BTAG(s.mkString)), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | (ETAG, (s) => T_ETAG(s.mkString)), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | (WORD, (s) => T_WORD(s.mkString)), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | (WHITESPACE, (s) => T_WHITESPACE)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | val ts = tokenize_file(lexing_rules, "test.html") | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | val WIDTH = 60 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | def is_tag(t: Token) = t match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | case T_BTAG(_) => true | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | case T_ETAG(_) => true | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | case _ => false | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | case Nil => println(Console.RESET) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 |   case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 |   case T_WORD(s)::rest => {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | val newc = c + s.length | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | val newstr = Console.RESET + ctr.reverse.mkString + s | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 |     if (newc < WIDTH) {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | print(newstr); | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | interpret(rest, newc, ctr) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 |     else {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 |       print("\n" + newstr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | interpret(rest, s.length, ctr) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 |   case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 |   case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 |   case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 |   case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 |   case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 |   case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 |   case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | case _::rest => interpret(rest, c, ctr) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | interpret(ts, 0, Nil) |