progs/html1.scala
changeset 93 4794759139ea
parent 92 e85600529ca5
equal deleted inserted replaced
92:e85600529ca5 93:4794759139ea
       
     1 
       
     2 
       
     3 //:load matcher.scala
       
     4 
       
     5 
       
     6 // some regular expressions
       
     7 val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
       
     8 val DIGIT = RANGE("0123456789")
       
     9 val NONZERODIGIT = RANGE("123456789")
       
    10 
       
    11 val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER)))
       
    12 val BTAG = SEQS("<", NAME, ">") 
       
    13 val ETAG = SEQS("</", PLUS(LETTER), ">")
       
    14 
       
    15 val WORD = PLUS(ALT(LETTER, DIGIT))
       
    16 val WHITESPACE = PLUS(RANGE(" \n"))
       
    17 
       
    18 // for classifying the strings that have been recognised
       
    19 abstract class Token
       
    20 case object T_WHITESPACE extends Token
       
    21 case class T_WORD(s: String) extends Token
       
    22 case class T_ETAG(s: String) extends Token
       
    23 case class T_BTAG(s: String) extends Token
       
    24 case class T_NT(s: String, rhs: List[Token]) extends Token
       
    25 
       
    26 def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
       
    27   tokenize(rs, s.toList)
       
    28 
       
    29 
       
    30 
       
    31 // lexing rules for arithmetic expressions
       
    32 val lexing_rules: List[Rule[Token]]= 
       
    33   List((BTAG, (s) => T_BTAG(s.mkString)),
       
    34        (ETAG, (s) => T_ETAG(s.mkString)),
       
    35        (WORD, (s) => T_WORD(s.mkString)),
       
    36        (WHITESPACE, (s) => T_WHITESPACE))
       
    37 
       
    38 val ts = tokenize_file(lexing_rules, "test.html")
       
    39 
       
    40 
       
    41 val WIDTH = 60
       
    42 
       
    43 def is_tag(t: Token) = t match {
       
    44   case T_BTAG(_) => true
       
    45   case T_ETAG(_) => true
       
    46   case _ => false
       
    47 }
       
    48 
       
    49 def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
       
    50   case Nil => println(Console.RESET)
       
    51   case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
       
    52   case T_WORD(s)::rest => {
       
    53     val newc = c + s.length
       
    54     val newstr = Console.RESET + ctr.reverse.mkString + s
       
    55     if (newc < WIDTH) {
       
    56       print(newstr);
       
    57       interpret(rest, newc, ctr)
       
    58     }
       
    59     else {
       
    60       print("\n" + newstr)
       
    61       interpret(rest, s.length, ctr)
       
    62     } 
       
    63   }
       
    64   case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
       
    65   case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
       
    66   case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
       
    67   case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
       
    68   case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
       
    69   case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
       
    70   case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
       
    71   case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
       
    72   case _::rest => interpret(rest, c, ctr)
       
    73 }
       
    74  
       
    75 interpret(ts, 0, Nil)