html.scala
changeset 92 e85600529ca5
parent 91 47f86885d481
child 93 4794759139ea
--- a/html.scala	Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-
-//:load matcher.scala
-
-// some regular expressions
-val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
-val WORD = PLUS(SYM)
-
-val BTAG = SEQS("<", WORD, ">") 
-val ETAG = SEQS("</", WORD, ">")
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-
-// for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case class T_WORD(s: String) extends Token
-case class T_ETAG(s: String) extends Token
-case class T_BTAG(s: String) extends Token
-case class T_NT(s: String, rhs: List[Token]) extends Token
-
-val lexing_rules: List[Rule[Token]] = 
-  List((BTAG, (s) => T_BTAG(s.mkString)),
-       (ETAG, (s) => T_ETAG(s.mkString)),
-       (WORD, (s) => T_WORD(s.mkString)),
-       (WHITESPACE, (s) => T_WHITESPACE))
-
-// the tokenizer
-val T = Tokenizer(lexing_rules)
-
-// width for printing
-val WIDTH = 60
-
-
-def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
-  case Nil => println(Console.RESET)
-  case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
-  case T_WORD(s)::rest => {
-    val newstr = Console.RESET + ctr.reverse.mkString + s
-    if (c + s.length < WIDTH) {
-      print(newstr);
-      interpret(rest, c + s.length, ctr)
-    }
-    else {
-      print("\n" + newstr)
-      interpret(rest, s.length, ctr)
-    } 
-  }
-  case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
-  case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
-  case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
-  case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
-  case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
-  case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
-  case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
-  case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
-  case _::rest => interpret(rest, c, ctr)
-}
- 
-val test_string = """
-<b>MSc Projects</b>
-
-<p>
-start of paragraph. <cyan> a <red>cyan</red> word</cyan> normal again something longer.
-</p> 
-
-
- <p><b>Description:</b>  
-  <a>Regular expressions</a> are extremely useful for many text-processing tasks such as finding patterns in texts,
-  lexing programs, syntax highlighting and so on. Given that regular expressions were
-  introduced in 1950 by <a>Stephen Kleene</a>, you might think 
-  regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still
-  an active research area. For example
-  <a>this paper</a> 
-  about regular expression matching and partial derivatives was presented this summer at the international 
-  PPDP'12 conference. The task in this project is to implement the results from this paper.</p>
-
-  <p>The background for this project is that some regular expressions are 
-  <a>evil</a>
-  and can stab you in the back; according to
-  this <a>blog post</a>.
-  For example, if you use in <a>Python</a> or 
-  in <a>Ruby</a> (probably also in other mainstream programming languages) the 
-  innocently looking regular expression a?{28}a{28} and match it, say, against the string 
-  <red>aaaaaaaaaa<cyan>aaaaaaa</cyan>aaaaaaaaaaa</red> (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact,
-  Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself:
-  <a>re.py</a> (Python version) and 
-  <a>re.rb</a> 
-  (Ruby version). You can imagine an attacker
-  mounting a nice <a>DoS attack</a> against 
-  your program if it contains such an evil regular expression. Actually 
-  <a>Scala</a> (and also Java) are almost immune from such
-  attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale
-  the regular expression and string further to, say, 4,600 as, then you get a 
-  StackOverflowError 
-  potentially crashing your program.
-  </p>
-"""
-
-interpret(T.fromString(test_string), 0, Nil)