progs/html1.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Fri, 26 Sep 2014 14:06:55 +0100
changeset 258 1e4da6d2490c
parent 93 4794759139ea
permissions -rw-r--r--
updated programs



//:load matcher.scala


// some regular expressions
val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
val DIGIT = RANGE("0123456789")
val NONZERODIGIT = RANGE("123456789")

val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER)))
val BTAG = SEQS("<", NAME, ">") 
val ETAG = SEQS("</", PLUS(LETTER), ">")

val WORD = PLUS(ALT(LETTER, DIGIT))
val WHITESPACE = PLUS(RANGE(" \n"))

// for classifying the strings that have been recognised
abstract class Token
case object T_WHITESPACE extends Token
case class T_WORD(s: String) extends Token
case class T_ETAG(s: String) extends Token
case class T_BTAG(s: String) extends Token
case class T_NT(s: String, rhs: List[Token]) extends Token

def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
  tokenize(rs, s.toList)



// lexing rules for arithmetic expressions
val lexing_rules: List[Rule[Token]]= 
  List((BTAG, (s) => T_BTAG(s.mkString)),
       (ETAG, (s) => T_ETAG(s.mkString)),
       (WORD, (s) => T_WORD(s.mkString)),
       (WHITESPACE, (s) => T_WHITESPACE))

val ts = tokenize_file(lexing_rules, "test.html")


val WIDTH = 60

def is_tag(t: Token) = t match {
  case T_BTAG(_) => true
  case T_ETAG(_) => true
  case _ => false
}

def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
  case Nil => println(Console.RESET)
  case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
  case T_WORD(s)::rest => {
    val newc = c + s.length
    val newstr = Console.RESET + ctr.reverse.mkString + s
    if (newc < WIDTH) {
      print(newstr);
      interpret(rest, newc, ctr)
    }
    else {
      print("\n" + newstr)
      interpret(rest, s.length, ctr)
    } 
  }
  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
  case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
  case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
  case _::rest => interpret(rest, c, ctr)
}
 
interpret(ts, 0, Nil)