# HG changeset patch # User Christian Urban # Date 1471736247 -3600 # Node ID 4b454a6d1814fa193bf08747492564fe5436ffa8 # Parent 0a42d73e795b1e33b5cb2d361b2a92d9bf91457c updated diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho01.pdf Binary file handouts/ho01.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho01.tex --- a/handouts/ho01.tex Fri Jul 15 10:54:09 2016 +0100 +++ b/handouts/ho01.tex Sun Aug 21 00:37:27 2016 +0100 @@ -220,8 +220,17 @@ found at \url{http://www.computerbytesman.com/redos/}.} Admittedly, this regular expression is carefully chosen to exhibit this exponential behaviour, but similar ones occur -more often than one wants in ``real life''. They are sometimes -called \emph{evil regular expressions} because they have the +more often than one wants in ``real life''. For example, on 20 +July 2016 a similar regular expression brought the webpage +\href{http://stackexchange.com}{Stack Exchange} to its knees: + +\begin{center} +\url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016} +\end{center} + +\noindent +Such troublesome regular expressions are sometimes called +\emph{evil regular expressions} because they have the potential to make regular expression matching engines to topple over, like in Python and Ruby. The problem with evil regular expressions is that they can have some serious @@ -229,8 +238,8 @@ web-application. The reason is that hackers can look for these instances where the matching engine behaves badly and then mount a nice DoS-attack against your application. These -attacks are already have their own name: -\emph{Regular Expression Denial of Service Attacks (ReDoS)}. +attacks are already have their own name: \emph{Regular +Expression Denial of Service Attacks (ReDoS)}. It will be instructive to look behind the ``scenes'' to find out why Python and Ruby (and others) behave so badly when diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho02.pdf Binary file handouts/ho02.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho03.pdf Binary file handouts/ho03.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho04.pdf Binary file handouts/ho04.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho05.pdf Binary file handouts/ho05.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho06.pdf Binary file handouts/ho06.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho07.pdf Binary file handouts/ho07.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/ho08.pdf Binary file handouts/ho08.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/notation.pdf Binary file handouts/notation.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 handouts/scala-ho.pdf Binary file handouts/scala-ho.pdf has changed diff -r 0a42d73e795b -r 4b454a6d1814 progs/collatz.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/collatz.scala Sun Aug 21 00:37:27 2016 +0100 @@ -0,0 +1,5 @@ +def collatz(n: BigInt) : Boolean = { + if (n == 1) true else + if (n % 2 == 0) collatz(n / 2) else + collatz(3 * n + 1) +} diff -r 0a42d73e795b -r 4b454a6d1814 progs/re4.scala --- a/progs/re4.scala Fri Jul 15 10:54:09 2016 +0100 +++ b/progs/re4.scala Sun Aug 21 00:37:27 2016 +0100 @@ -4,46 +4,46 @@ def simp : Rexp = this } -case object NULL extends Rexp -case object EMPTY extends Rexp +case object ZERO extends Rexp +case object ONE extends Rexp case class CHAR(c: Char) extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexp { override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (ZERO, r) => r + case (r, ZERO) => r + case (r, ONE) => if (nullable(r)) r else ALT(r, ONE) + case (ONE, r) => if (nullable(r)) r else ALT(r, ONE) case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) } } case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r) => r + case (r, ONE) => r case (r1, r2) => SEQ(r1, r2) } } case class STAR(r: Rexp) extends Rexp { override def simp = r.simp match { - case NULL => EMPTY - case EMPTY => EMPTY + case ZERO => ONE + case ONE => ONE case r => STAR(r) } } case class NTIMES(r: Rexp, n: Int) extends Rexp { - override def simp = if (n == 0) EMPTY else + override def simp = if (n == 0) ONE else r.simp match { - case NULL => NULL - case EMPTY => EMPTY + case ZERO => ZERO + case ONE => ONE case r => NTIMES(r, n) } } // some convenience for typing in regular expressions def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY + case Nil => ONE case c::Nil => CHAR(c) case c::s => SEQ(CHAR(c), charlist2rexp(s)) } @@ -53,8 +53,8 @@ // nullable function: tests whether the regular // expression can recognise the empty string def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true + case ZERO => false + case ONE => true case CHAR(_) => false case ALT(r1, r2) => nullable(r1) || nullable(r2) case SEQ(r1, r2) => nullable(r1) && nullable(r2) @@ -64,16 +64,16 @@ // derivative of a regular expression w.r.t. a character def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) case NTIMES(r, i) => - if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) } // derivative w.r.t. a string (iterates der) @@ -83,12 +83,23 @@ case c::s => ders(s, der(c, r).simp) } + +def ders2 (s: List[Char], r: Rexp) : Rexp = (s, r) match { + case (Nil, r) => r + case (s, ZERO) => ZERO + case (s, ONE) => if (s == Nil) ONE else ZERO + case (s, CHAR(c)) => if (s == List(c)) ONE else + if (s == Nil) CHAR(c) else ZERO + case (s, ALT(r1, r2)) => ALT(ders2(s, r2), ders2(s, r2)) + case (c::s, r) => ders2(s, der(c, r).simp) +} + // main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) +def matcher(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r)) //one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) +def OPT(r: Rexp) = ALT(r, ONE) def EVIL(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) @@ -99,9 +110,11 @@ (end - start)/(i * 1.0e9) } +val i = 5000000 +println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL(i), "a" * i)))) -for (i <- 1 to 12001 by 500) { - println(i + " " + "%.5f".format(time_needed(1, matcher(EVIL(i), "a" * i)))) +for (i <- 1 to 800001 by 10000) { + println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL(i), "a" * i)))) }