diff -r e85600529ca5 -r 4794759139ea progs/html1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/html1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,75 @@ + + +//:load matcher.scala + + +// some regular expressions +val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) +val BTAG = SEQS("<", NAME, ">") +val ETAG = SEQS("") + +val WORD = PLUS(ALT(LETTER, DIGIT)) +val WHITESPACE = PLUS(RANGE(" \n")) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_WORD(s: String) extends Token +case class T_ETAG(s: String) extends Token +case class T_BTAG(s: String) extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((BTAG, (s) => T_BTAG(s.mkString)), + (ETAG, (s) => T_ETAG(s.mkString)), + (WORD, (s) => T_WORD(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE)) + +val ts = tokenize_file(lexing_rules, "test.html") + + +val WIDTH = 60 + +def is_tag(t: Token) = t match { + case T_BTAG(_) => true + case T_ETAG(_) => true + case _ => false +} + +def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { + case Nil => println(Console.RESET) + case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) + case T_WORD(s)::rest => { + val newc = c + s.length + val newstr = Console.RESET + ctr.reverse.mkString + s + if (newc < WIDTH) { + print(newstr); + interpret(rest, newc, ctr) + } + else { + print("\n" + newstr) + interpret(rest, s.length, ctr) + } + } + case T_BTAG("

")::rest => print("\n"); interpret(rest, 0, ctr) + case T_ETAG("

")::rest => print("\n"); interpret(rest, 0, ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) + case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) + case _::rest => interpret(rest, c, ctr) +} + +interpret(ts, 0, Nil)