# HG changeset patch # User Christian Urban # Date 1476107883 -3600 # Node ID 96129128d0f178c29dfdcd2e81e39b96c5042649 # Parent 16742bf623650c0a3a46bdb09949961f9d8d57dd updated diff -r 16742bf62365 -r 96129128d0f1 data.sty --- a/data.sty Sat Oct 08 16:44:11 2016 +0100 +++ b/data.sty Mon Oct 10 14:58:03 2016 +0100 @@ -21,6 +21,25 @@ #29 58.886 \end{filecontents} +\begin{filecontents}{re-python2.data} +1 0.033 +5 0.036 +10 0.034 +15 0.036 +18 0.059 +19 0.084 +20 0.141 +21 0.248 +22 0.485 +23 0.878 +24 1.71 +25 3.40 +26 7.08 +27 14.12 +28 26.69 +\end{filecontents} + + \begin{filecontents}{re-ruby.data} 1 0.00006 #2 0.00003 diff -r 16742bf62365 -r 96129128d0f1 handouts/ho01.pdf Binary file handouts/ho01.pdf has changed diff -r 16742bf62365 -r 96129128d0f1 handouts/ho01.tex --- a/handouts/ho01.tex Sat Oct 08 16:44:11 2016 +0100 +++ b/handouts/ho01.tex Mon Oct 10 14:58:03 2016 +0100 @@ -225,10 +225,8 @@ legend entries={Python,Ruby}, legend pos=north west, legend cell align=left] -\addplot[blue,mark=*, mark options={fill=white}] - table {re-python.data}; -\addplot[brown,mark=triangle*, mark options={fill=white}] - table {re-ruby.data}; +\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; +\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data}; \end{axis} \end{tikzpicture} & @@ -248,9 +246,10 @@ axis lines=left, width=5.5cm, height=4.5cm, - legend entries={Java}, + legend entries={Python, Java}, legend pos=north west, legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; \end{axis} \end{tikzpicture} @@ -258,19 +257,18 @@ \end{center} \noindent This first graph shows that Python needs approximately 29 -seconds for finding out whether a string of 28 \texttt{a}s -matches the regular expression \texttt{a?\{28\}\,a\{28\}}. -Ruby is even slightly worse.\footnote{In this example Ruby -uses the slightly different regular expression -\texttt{a?a?a?...a?a?aaa...aa}, where the \texttt{a?} and -\texttt{a} each occur $n$ times. More such test cases can be -found at \url{http://www.computerbytesman.com/redos/}.} Simlarly, Java needs approximately -30 seconds to find out that the regular expression $\texttt{(a*)*\,b}$ -does not match strings of 28 \texttt{a}s. -Admittedly, these regular expressions are carefully chosen to -exhibit this exponential behaviour, but similar ones occur -more often than one wants in ``real life''. For example, on 20 -July 2016 a similar regular expression brought the webpage +seconds for finding out whether a string of 28 \texttt{a}s matches the +regular expression \texttt{a?\{28\}\,a\{28\}}. Ruby is even slightly +worse.\footnote{In this example Ruby uses the slightly different + regular expression \texttt{a?a?a?...a?a?aaa...aa}, where the + \texttt{a?} and \texttt{a} each occur $n$ times. More such test + cases can be found at \url{http://www.computerbytesman.com/redos/}.} +Simlarly, Python and Java needs approximately 30 seconds to find out +that the regular expression $\texttt{(a*)*\,b}$ does not match strings +of 28 \texttt{a}s. Admittedly, these regular expressions are +carefully chosen to exhibit this exponential behaviour, but similar +ones occur more often than one wants in ``real life''. For example, on +20 July 2016 a similar regular expression brought the webpage \href{http://stackexchange.com}{Stack Exchange} to its knees: \begin{center} diff -r 16742bf62365 -r 96129128d0f1 handouts/ho02.pdf Binary file handouts/ho02.pdf has changed diff -r 16742bf62365 -r 96129128d0f1 progs/catastrophic2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/catastrophic2.py Mon Oct 10 14:58:03 2016 +0100 @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import re +import sys + +# case of catastrophic backtracking in Python +# +# regex: (a*)*b +# strings: aa...a +# +# call with timing as: +# +# > time ./catastrophic.py 20 + +# counter n given on the command line +cn = sys.argv[1] + +# calling the matching function +m = re.match('(a*)*b' , "a" * int(cn)) + +print m.group(0) diff -r 16742bf62365 -r 96129128d0f1 slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r 16742bf62365 -r 96129128d0f1 slides/slides02.pdf Binary file slides/slides02.pdf has changed diff -r 16742bf62365 -r 96129128d0f1 slides/slides03.pdf Binary file slides/slides03.pdf has changed diff -r 16742bf62365 -r 96129128d0f1 slides/slides04.pdf Binary file slides/slides04.pdf has changed