updated
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Sun, 21 Aug 2016 00:37:27 +0100
changeset 407 4b454a6d1814
parent 406 0a42d73e795b
child 408 5322e1c46890
updated
handouts/ho01.pdf
handouts/ho01.tex
handouts/ho02.pdf
handouts/ho03.pdf
handouts/ho04.pdf
handouts/ho05.pdf
handouts/ho06.pdf
handouts/ho07.pdf
handouts/ho08.pdf
handouts/notation.pdf
handouts/scala-ho.pdf
progs/collatz.scala
progs/re4.scala
Binary file handouts/ho01.pdf has changed
--- a/handouts/ho01.tex	Fri Jul 15 10:54:09 2016 +0100
+++ b/handouts/ho01.tex	Sun Aug 21 00:37:27 2016 +0100
@@ -220,8 +220,17 @@
 found at \url{http://www.computerbytesman.com/redos/}.}
 Admittedly, this regular expression is carefully chosen to
 exhibit this exponential behaviour, but similar ones occur
-more often than one wants in ``real life''. They are sometimes
-called \emph{evil regular expressions} because they have the
+more often than one wants in ``real life''. For example, on 20
+July 2016 a similar regular expression brought the webpage
+\href{http://stackexchange.com}{Stack Exchange} to its knees:
+
+\begin{center}
+\url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
+\end{center}
+
+\noindent
+Such troublesome regular expressions are sometimes called
+\emph{evil regular expressions} because they have the
 potential to make regular expression matching engines to
 topple over, like in Python and Ruby. The problem with evil
 regular expressions is that they can have some serious
@@ -229,8 +238,8 @@
 web-application. The reason is that hackers can look for these
 instances where the matching engine behaves badly and then
 mount a nice DoS-attack against your application. These
-attacks are already have their own name: 
-\emph{Regular Expression Denial of Service Attacks (ReDoS)}.
+attacks are already have their own name: \emph{Regular
+Expression Denial of Service Attacks (ReDoS)}.
 
 It will be instructive to look behind the ``scenes'' to find
 out why Python and Ruby (and others) behave so badly when
Binary file handouts/ho02.pdf has changed
Binary file handouts/ho03.pdf has changed
Binary file handouts/ho04.pdf has changed
Binary file handouts/ho05.pdf has changed
Binary file handouts/ho06.pdf has changed
Binary file handouts/ho07.pdf has changed
Binary file handouts/ho08.pdf has changed
Binary file handouts/notation.pdf has changed
Binary file handouts/scala-ho.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/collatz.scala	Sun Aug 21 00:37:27 2016 +0100
@@ -0,0 +1,5 @@
+def collatz(n: BigInt) : Boolean = {
+  if (n == 1) true else 
+  if (n % 2 == 0) collatz(n / 2) else 
+  collatz(3 * n + 1)
+}
--- a/progs/re4.scala	Fri Jul 15 10:54:09 2016 +0100
+++ b/progs/re4.scala	Sun Aug 21 00:37:27 2016 +0100
@@ -4,46 +4,46 @@
   def simp : Rexp = this
 }
 
-case object NULL extends Rexp
-case object EMPTY extends Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
 case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
   override def simp = (r1.simp, r2.simp) match {
-    case (NULL, r) => r
-    case (r, NULL) => r
-    case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
-    case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
+    case (ZERO, r) => r
+    case (r, ZERO) => r
+    case (r, ONE) => if (nullable(r)) r else ALT(r, ONE)
+    case (ONE, r) => if (nullable(r)) r else ALT(r, ONE)
     case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
   }
 }
 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
   override def simp = (r1.simp, r2.simp) match {
-    case (NULL, _) => NULL
-    case (_, NULL) => NULL
-    case (EMPTY, r) => r
-    case (r, EMPTY) => r
+    case (ZERO, _) => ZERO
+    case (_, ZERO) => ZERO
+    case (ONE, r) => r
+    case (r, ONE) => r
     case (r1, r2) => SEQ(r1, r2)
   }
 }
 case class STAR(r: Rexp) extends Rexp {
   override def simp = r.simp match {
-    case NULL => EMPTY
-    case EMPTY => EMPTY
+    case ZERO => ONE
+    case ONE => ONE
     case r => STAR(r)
   }
 }
 case class NTIMES(r: Rexp, n: Int) extends Rexp {
-  override def simp = if (n == 0) EMPTY else 
+  override def simp = if (n == 0) ONE else 
     r.simp match {
-      case NULL => NULL
-      case EMPTY => EMPTY
+      case ZERO => ZERO
+      case ONE => ONE
       case r => NTIMES(r, n)
     }
 }
 
 // some convenience for typing in regular expressions
 def charlist2rexp(s : List[Char]) : Rexp = s match {
-  case Nil => EMPTY
+  case Nil => ONE
   case c::Nil => CHAR(c)
   case c::s => SEQ(CHAR(c), charlist2rexp(s))
 }
@@ -53,8 +53,8 @@
 // nullable function: tests whether the regular 
 // expression can recognise the empty string
 def nullable (r: Rexp) : Boolean = r match {
-  case NULL => false
-  case EMPTY => true
+  case ZERO => false
+  case ONE => true
   case CHAR(_) => false
   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
@@ -64,16 +64,16 @@
 
 // derivative of a regular expression w.r.t. a character
 def der (c: Char, r: Rexp) : Rexp = r match {
-  case NULL => NULL
-  case EMPTY => NULL
-  case CHAR(d) => if (c == d) EMPTY else NULL
+  case ZERO => ZERO
+  case ONE => ZERO
+  case CHAR(d) => if (c == d) ONE else ZERO
   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
   case SEQ(r1, r2) => 
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
     else SEQ(der(c, r1), r2)
   case STAR(r) => SEQ(der(c, r), STAR(r))
   case NTIMES(r, i) => 
-    if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1))
+    if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
 }
 
 // derivative w.r.t. a string (iterates der)
@@ -83,12 +83,23 @@
   case c::s => ders(s, der(c, r).simp)
 }
 
+
+def ders2 (s: List[Char], r: Rexp) : Rexp = (s, r) match {
+  case (Nil, r) => r
+  case (s, ZERO) => ZERO
+  case (s, ONE) => if (s == Nil) ONE else ZERO
+  case (s, CHAR(c)) => if (s == List(c)) ONE else 
+                       if (s == Nil) CHAR(c) else ZERO
+  case (s, ALT(r1, r2)) => ALT(ders2(s, r2), ders2(s, r2))
+  case (c::s, r) => ders2(s, der(c, r).simp)
+}
+
 // main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r))
 
 
 //one or zero
-def OPT(r: Rexp) = ALT(r, EMPTY)
+def OPT(r: Rexp) = ALT(r, ONE)
 
 def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
 
@@ -99,9 +110,11 @@
   (end - start)/(i * 1.0e9)
 }
 
+val i = 5000000
+println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL(i), "a" * i))))
 
-for (i <- 1 to 12001 by 500) {
-  println(i + " " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i))))
+for (i <- 1 to 800001 by 10000) {
+  println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL(i), "a" * i))))
 }