diff -r 47f86885d481 -r e85600529ca5 html.scala --- a/html.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ - -//:load matcher.scala - -// some regular expressions -val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") -val WORD = PLUS(SYM) - -val BTAG = SEQS("<", WORD, ">") -val ETAG = SEQS("") - -val WHITESPACE = PLUS(RANGE(" \n")) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_WORD(s: String) extends Token -case class T_ETAG(s: String) extends Token -case class T_BTAG(s: String) extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -val lexing_rules: List[Rule[Token]] = - List((BTAG, (s) => T_BTAG(s.mkString)), - (ETAG, (s) => T_ETAG(s.mkString)), - (WORD, (s) => T_WORD(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val T = Tokenizer(lexing_rules) - -// width for printing -val WIDTH = 60 - - -def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { - case Nil => println(Console.RESET) - case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) - case T_WORD(s)::rest => { - val newstr = Console.RESET + ctr.reverse.mkString + s - if (c + s.length < WIDTH) { - print(newstr); - interpret(rest, c + s.length, ctr) - } - else { - print("\n" + newstr) - interpret(rest, s.length, ctr) - } - } - case T_BTAG("

")::rest => print("\n"); interpret(rest, 0, ctr) - case T_ETAG("

")::rest => print("\n"); interpret(rest, 0, ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) - case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) - case _::rest => interpret(rest, c, ctr) -} - -val test_string = """ -MSc Projects - -

-start of paragraph. a cyan word normal again something longer. -

- - -

Description: - Regular expressions are extremely useful for many text-processing tasks such as finding patterns in texts, - lexing programs, syntax highlighting and so on. Given that regular expressions were - introduced in 1950 by Stephen Kleene, you might think - regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still - an active research area. For example - this paper - about regular expression matching and partial derivatives was presented this summer at the international - PPDP'12 conference. The task in this project is to implement the results from this paper.

- -

The background for this project is that some regular expressions are - evil - and can stab you in the back; according to - this blog post. - For example, if you use in Python or - in Ruby (probably also in other mainstream programming languages) the - innocently looking regular expression a?{28}a{28} and match it, say, against the string - aaaaaaaaaaaaaaaaaaaaaaaaaaaa (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact, - Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself: - re.py (Python version) and - re.rb - (Ruby version). You can imagine an attacker - mounting a nice DoS attack against - your program if it contains such an evil regular expression. Actually - Scala (and also Java) are almost immune from such - attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale - the regular expression and string further to, say, 4,600 as, then you get a - StackOverflowError - potentially crashing your program. -

-""" - -interpret(T.fromString(test_string), 0, Nil)