diff -r e85600529ca5 -r 4794759139ea scala/html1.scala --- a/scala/html1.scala Sat Jun 15 09:11:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ - - -//:load matcher.scala - - -// some regular expressions -val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) -val BTAG = SEQS("<", NAME, ">") -val ETAG = SEQS("", PLUS(LETTER), ">") - -val WORD = PLUS(ALT(LETTER, DIGIT)) -val WHITESPACE = PLUS(RANGE(" \n")) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_WORD(s: String) extends Token -case class T_ETAG(s: String) extends Token -case class T_BTAG(s: String) extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = - tokenize(rs, s.toList) - - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((BTAG, (s) => T_BTAG(s.mkString)), - (ETAG, (s) => T_ETAG(s.mkString)), - (WORD, (s) => T_WORD(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE)) - -val ts = tokenize_file(lexing_rules, "test.html") - - -val WIDTH = 60 - -def is_tag(t: Token) = t match { - case T_BTAG(_) => true - case T_ETAG(_) => true - case _ => false -} - -def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { - case Nil => println(Console.RESET) - case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) - case T_WORD(s)::rest => { - val newc = c + s.length - val newstr = Console.RESET + ctr.reverse.mkString + s - if (newc < WIDTH) { - print(newstr); - interpret(rest, newc, ctr) - } - else { - print("\n" + newstr) - interpret(rest, s.length, ctr) - } - } - case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) - case T_BTAG("