# HG changeset patch
# User Christian Urban <urbanc@in.tum.de>
# Date 1471947973 -7200
# Node ID 4ae59fd3b17495d7726f8481b6f9ac877caf95fa
# Parent  065ca01b62aeb362aae9e9c3d55d6dd7801c1c4e
updated

diff -r 065ca01b62ae -r 4ae59fd3b174 data.sty
--- a/data.sty	Mon Aug 22 23:05:43 2016 +0200
+++ b/data.sty	Tue Aug 23 12:26:13 2016 +0200
@@ -71,7 +71,7 @@
 28  29.81185
 \end{filecontents}
 
-
+%% Scala V1, example a?{n} a{n}
 \begin{filecontents}{re1.data}
 1 0.00179
 2 0.00011
@@ -96,59 +96,64 @@
 21 21.46013
 \end{filecontents}
 
-\begin{filecontents}{re2a.data}
-1 0.00227
-5 0.00027
-10 0.00075
-15 0.00178
-20 0.00102
-25 0.00028
-30 0.00040
-35 0.00052
-40 0.00075
-45 0.00125
-50 0.00112
-55 0.00099
-60 0.00113
-65 0.00137
-70 0.00170
+%% Scala V1, example (a*)* b
+\begin{filecontents}{re1a.data}
+1 0.00013
+501 0.03610
+1001 0.08543
+1501 0.06424
+2001 0.11329
+2501 0.19428
+3001 0.31500
+3501 0.37748
+4001 0.49636
+4501 0.63803
+5001 0.75320
+5501 0.92438
+6001 1.01979
+6501 1.21709
 \end{filecontents}
 
-\begin{filecontents}{re2b.data}
-1 0.00020
-51 0.00080
-101 0.00678
-151 0.01792
-201 0.04815
-251 0.09648
-301 0.23195
-351 0.52646
-401 0.96277
-451 1.57726
-501 2.00166
-551 2.98341
-601 4.81181
-651 6.57054
-701 9.73973
-751 14.25762
-801 14.80760
-851 19.60958
-901 25.43550
-951 31.96038
+%% Scala V2, example a?{n} a{n}
+\begin{filecontents}{re2.data}
+1 0.00004
+51 0.00079
+101 0.00752
+151 0.02008
+201 0.04462
+251 0.07715
+301 0.13366
+351 0.21700
+401 0.34163
+451 0.47499
+501 0.65810
+551 0.88666
+601 1.17030
+651 1.51570
+701 1.91133
+751 2.38194
+801 3.10061
+851 4.35828
+901 4.73774
+951 6.05984
+1001 6.68179
 \end{filecontents}
 
-\begin{filecontents}{re2c.data}
-1 0.00070
-501 0.02137
-1001 0.04070
-1501 0.08710
-2001 0.14441
-2501 0.21303
-3001 0.33987
-3501 0.43522
-4001 0.55873
+%% Scala V2, example (a*)* b
+\begin{filecontents}{re2a.data}
+1 0.00009
+501 0.02582
+1001 0.03406
+1501 0.07891
+2001 0.14417
+2501 0.22065
+3001 0.33145
+3501 0.44883
+4001 0.63173
+4501 0.81166
 \end{filecontents}
 
+%% Scala V4, example a?{n} a{n}
 \begin{filecontents}{re3.data}
 1 0.001605
 501 0.131066
@@ -176,20 +181,56 @@
 11501 7.95864
 \end{filecontents}
 
+%% Scala V4, example (a*)* b
 \begin{filecontents}{re3a.data}
-1 0.00006
-500001 0.12975
-1000001 0.38887
-1500001 0.61373
-2000001 0.78268
-2500001 0.98069
-3000001 1.41858
-3500001 1.68450
-4000001 1.84533
-4500001 1.91059
-5000001 1.98881
-5500001 2.66755
-6000001 3.16683
+1 0.00015
+500001 0.16143
+1000001 0.39022
+1500001 0.69623
+2000001 0.92665
+2500001 1.04422
+3000001 1.31200
+3500001 1.51645
+4000001 1.85757
+4500001 1.85903
+5000001 2.01376
+5500001 2.57703
+6000001 4.70017
+6500001 8.62478
+7000001 13.10504
+7500001 17.97648
+\end{filecontents}
+
+%% Scala V5, example a?{n} a{n}
+\begin{filecontents}{re4.data}
+1 0.00007
+1000001 0.65112
+2000001 1.25011
+3000001 1.34581
+4000001 1.68338
+5000001 4.15276
+6000001 13.86084
+7000001 26.39926
+\end{filecontents}
+
+%% Scala V5, example (a*)* b
+\begin{filecontents}{re4a.data}
+1 0.00015
+500001 0.18391
+1000001 0.53715
+1500001 0.72122
+2000001 1.00681
+2500001 1.46824
+3000001 1.77297
+3500001 2.09406
+4000001 2.32992
+4500001 3.54669
+5000001 4.40195
+5500001 3.41926
+6000001 7.32595
+6500001 12.27531
+7000001 19.29151
+7500001 26.28967
 \end{filecontents}
  
 \begin{filecontents}{nfa.data}
diff -r 065ca01b62ae -r 4ae59fd3b174 handouts/ho01.pdf
Binary file handouts/ho01.pdf has changed
diff -r 065ca01b62ae -r 4ae59fd3b174 handouts/ho01.tex
--- a/handouts/ho01.tex	Mon Aug 22 23:05:43 2016 +0200
+++ b/handouts/ho01.tex	Tue Aug 23 12:26:13 2016 +0200
@@ -24,7 +24,7 @@
 %% emacs regexes
 %% https://www.gnu.org/software/emacs/manual/html_node/elisp/Regular-Expressions.html
 
-%% reasons for a new prgramming language
+%%  reasons for a new prgramming language
 %% http://beautifulracket.com
 
 \begin{document}
@@ -182,13 +182,17 @@
 are many libraries implementing regular expressions. I am sure
 you have come across them before (remember PRA?). Why on earth
 then is there any interest in studying them again in depth in
-this module? Well, one answer is in the following graph about
-regular expression matching in Python and in Ruby.
+this module? Well, one answer is in the following two graphs about
+regular expression matching in Python, Ruby and Java.
 
 \begin{center}
+\begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
 \begin{axis}[
-    xlabel={strings of {\tt a}s},
+    title={Graph: $\texttt{a?\{n\}\,a{\{n\}}}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
@@ -197,7 +201,7 @@
     ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
-    width=7cm,
+    width=5.5cm,
     height=4.5cm, 
     legend entries={Python,Ruby},  
     legend pos=north west,
@@ -208,17 +212,43 @@
   table {re-ruby.data};  
 \end{axis}
 \end{tikzpicture}
+&
+\begin{tikzpicture}
+\begin{axis}[
+    title={Graph: $\texttt{(a*)*\,b}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5.5cm,
+    height=4.5cm, 
+    legend entries={Java},  
+    legend pos=north west,
+    legend cell align=left]
+\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+\end{axis}
+\end{tikzpicture}
+\end{tabular}
 \end{center}
 
-\noindent This graph shows that Python needs approximately 29
+\noindent This first graph shows that Python needs approximately 29
 seconds for finding out whether a string of 28 \texttt{a}s
-matches the regular expression \texttt{[a?]\{28\}[a]\{28\}}.
+matches the regular expression \texttt{a?\{28\}\,a\{28\}}.
 Ruby is even slightly worse.\footnote{In this example Ruby
 uses the slightly different regular expression
 \texttt{a?a?a?...a?a?aaa...aa}, where the \texttt{a?} and
 \texttt{a} each occur $n$ times. More such test cases can be
-found at \url{http://www.computerbytesman.com/redos/}.}
-Admittedly, this regular expression is carefully chosen to
+found at \url{http://www.computerbytesman.com/redos/}.} Simlarly, Java needs approximately
+30 seconds to find out that the regular expression $\texttt{(a*)*\,b}$
+does not match strings of 28 \texttt{a}s.
+Admittedly, these regular expressions are carefully chosen to
 exhibit this exponential behaviour, but similar ones occur
 more often than one wants in ``real life''. For example, on 20
 July 2016 a similar regular expression brought the webpage
@@ -229,17 +259,17 @@
 \end{center}
 
 \noindent
-Such troublesome regular expressions are sometimes called
-\emph{evil regular expressions} because they have the
-potential to make regular expression matching engines to
-topple over, like in Python and Ruby. The problem with evil
-regular expressions is that they can have some serious
-consequences, for example, if you use them in your
-web-application. The reason is that hackers can look for these
-instances where the matching engine behaves badly and then
-mount a nice DoS-attack against your application. These
-attacks are already have their own name: \emph{Regular
-Expression Denial of Service Attacks (ReDoS)}.
+Such troublesome regular expressions are sometimes called \emph{evil
+  regular expressions} because they have the potential to make regular
+expression matching engines to topple over, like in Python, Ruby and
+Java. This `toopling over' is also sometimes called \emph{catastrophic
+  backtracking}.  The problem with evil regular expressions is that
+they can have some serious consequences, for example, if you use them
+in your web-application. The reason is that hackers can look for these
+instances where the matching engine behaves badly and then mount a
+nice DoS-attack against your application. These attacks are already
+have their own name: \emph{Regular Expression Denial of Service
+  Attacks (ReDoS)}.
 
 It will be instructive to look behind the ``scenes'' to find
 out why Python and Ruby (and others) behave so badly when
@@ -254,23 +284,53 @@
 \begin{center}
 \begin{tikzpicture}
   \begin{axis}[
-    xlabel={strings of \pcode{a}s},
+    title={Graph: $\texttt{a?\{n\}\,a{\{n\}}}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
     xtick={0,3000,...,12000},
-    xmax=12500,
-    ymax=35,
-    ytick={0,5,...,30},
+    xmax=13000,
+    ymax=12,
+    ytick={0,5,10},
     scaled ticks=false,
     axis lines=left,
-    width=9cm,
-    height=4.5cm]
-\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
+    width=7cm,
+    height=4.5cm,
+    legend entries={Our Algorithm V1, Our Algorithm V2},
+    legend pos=outer north east]
+\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
 \end{axis}
 \end{tikzpicture}
 \end{center}
 
+\noindent And in the case of the regular expression $\texttt{(a*)*\,b}$
+and strings of \texttt{a}s we will beat Java by factor of
+approximately 1,000,000 (note the scale on the $x$-axis). 
+
+\begin{center}
+\begin{tikzpicture}
+  \begin{axis}[
+    title={Graph: $\texttt{(a*)*\,b}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    ymax=12,
+    ytick={0,5,10},
+    axis lines=left,
+    width=7cm,
+    height=4.5cm,
+    legend entries={Our Algorithm V2},
+    legend pos=outer north east]
+\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
 \subsection*{Basic Regular Expressions}
 
 The regular expressions shown earlier for Scala, we
diff -r 065ca01b62ae -r 4ae59fd3b174 handouts/ho02.pdf
Binary file handouts/ho02.pdf has changed
diff -r 065ca01b62ae -r 4ae59fd3b174 handouts/ho02.tex
--- a/handouts/ho02.tex	Mon Aug 22 23:05:43 2016 +0200
+++ b/handouts/ho02.tex	Tue Aug 23 12:26:13 2016 +0200
@@ -24,7 +24,7 @@
 
 
 \begin{center}
-Plots: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\
+Graphs: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\
 \begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
 \begin{axis}[
@@ -64,7 +64,7 @@
     axis lines=left,
     width=6.5cm,
     height=5cm]
-\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
+\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
 \end{axis}
 \end{tikzpicture}
@@ -72,7 +72,7 @@
 \end{center}
 
 \begin{center}
-Plots: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$
+Graphs: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$
 \begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
 \begin{axis}[
@@ -91,7 +91,7 @@
     legend entries={Java},  
     legend pos=north west,
     legend cell align=left]
-\addplot[blue,mark=*, mark options={fill=white}] table {re-java.data};
+\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
 \end{axis}
 \end{tikzpicture}
 &
@@ -109,7 +109,7 @@
     axis lines=left,
     width=6.5cm,
     height=5cm]
-\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
+\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
 \end{axis}
 \end{tikzpicture}
@@ -482,32 +482,33 @@
 \begin{center}
 \begin{tikzpicture}
 \begin{axis}[
-    title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
     xlabel={$n$},
     x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
-    xtick={0,500,...,4000},
-    xmax=4100,
+    xtick={0,1000,...,6500},
+    xmax=6800,
     ytick={0,5,...,30},
+    ymax=34,
     scaled ticks=false,
     axis lines=left,
-    width=10cm,
+    width=8cm,
     height=4.5cm, 
     legend entries={Java,Scala V1},  
     legend pos=north east,
     legend cell align=left]
-\addplot[blue,mark=*, mark options={fill=white}] 
+\addplot[cyan,mark=*, mark options={fill=white}] 
   table {re-java.data};
 \addplot[red,mark=triangle*,mark options={fill=white}]
-  table {re2c.data};
+  table {re1a.data};
 \end{axis}
 \end{tikzpicture}
 \end{center}
 
 \noindent
-This is no error: it hardly takes more than half a second for
-strings up to the length of 4000. After that we receive a
+This is not an error: it hardly takes more than half a second for
+strings up to the length of 6500. After that we receive a
 StackOverflow exception, but still\ldots
 
 For running the algorithm with our first example, the evil
@@ -524,13 +525,13 @@
 \begin{center}
 \begin{tikzpicture}
 \begin{axis}[    
-    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
     xlabel={$n$},
     x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
-    xmax=30,
+    xmax=32,
     ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
@@ -538,7 +539,7 @@
     height=5cm, 
     legend entries={Python,Ruby,Scala V1},  
     legend pos=outer north east,
-    legend cell align=left  ]
+    legend cell align=left]
 \addplot[blue,mark=*, mark options={fill=white}] 
   table {re-python.data};
 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
@@ -584,7 +585,7 @@
 \begin{center}
 \begin{tikzpicture}
 \begin{axis}[
-    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
     xlabel={$n$},
     x label style={at={(1.01,0.0)}},
     ylabel={time in secs},
@@ -606,7 +607,7 @@
 \addplot[red,mark=triangle*,mark options={fill=white}] 
   table {re1.data};  
 \addplot[green,mark=square*,mark options={fill=white}] 
-  table {re2b.data};
+  table {re2.data};
 \end{axis}
 \end{tikzpicture}
 \end{center}
@@ -676,7 +677,7 @@
 \begin{center}
 \begin{tikzpicture}
 \begin{axis}[
-    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
     xlabel={$n$},
     x label style={at={(1.04,0.0)}},
     ylabel={time in secs},
@@ -684,42 +685,121 @@
     xtick={0,2000,...,12000},
     xmax=13000,
     ytick={0,5,...,30},
+    ymax=12,
     scaled ticks=false,
     axis lines=left,
     width=9cm,
     height=5cm, 
-    legend entries={Scala V2,Scala V3}]
-\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
+    legend entries={Scala V2,Scala V3},
+    legend pos=outer north east,
+    legend cell align=left]
+\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
 \end{axis}
 \end{tikzpicture}
 \end{center}
 
+\noindent
+To reacap, Python and Ruby needed approximately 30 seconds to match
+a string of 28 \texttt{a}s and the regular expression $a^{?\{n\}} \cdot a^{\{n\}}$. 
+We need a third of this time to do the same with strings up to 12,000 \texttt{a}s. 
+Similarly, Java needed 30 seconds to find out the regular expression 
+$(a^*)^* \cdot b$ does not match the string of 28 \texttt{a}s. We can do
+the same in approximately 5 seconds for strings of 6000000 \texttt{a}s:
+
+
 \begin{center}
 \begin{tikzpicture}
 \begin{axis}[
-    title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
     xlabel={$n$},
     x label style={at={(1.09,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
-    %xtick={0,1e+6,...,6e+6},
-    xmax=6200000,
+    xmax=7700000,
     ytick={0,5,...,30},
-    ymax=33,
-    scaled ticks=false,
+    ymax=15,
+    %scaled ticks=false,
     axis lines=left,
     width=9cm,
     height=5cm, 
-    legend entries={Scala V3}]
+    legend entries={Scala V2, Scala V3},
+    legend pos=outer north east,
+    legend cell align=left]
+\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data};
 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
 \end{axis}
 \end{tikzpicture}
 \end{center}
 
-\section{Epilogue}
+\subsection*{Epilogue}
+
+(23/Aug/2016) I recently found another place where this algorithm can be 
+sped (this idea is not integrated with what is coming next,
+but I present it nonetheless). The idea is to define \texttt{ders}
+not such that it iterates the derivative character-by-character, but
+in bigger chunks. The resulting code for \texttt{ders2} looks as
+follows:
+
+\lstinputlisting[numbers=none]{../progs/app52.scala} 
+
+\noindent
+I have not fully understood why this version is much faster,
+but it seems it is a combination of the clauses for \texttt{ALT}
+and \texttt{SEQ}. In the latter case we call \texttt{der} with 
+a single character and this potentially produces an alternative.
+The derivative of such an alternative can then be more effeciently  
+calculated by \texttt{ders2} since it pushes a whole string
+under an \texttt{ALT}. The numbers are that in the second case  
+$(a^*)^* \cdot b$ both versions are pretty much the same, but in the 
+first case $a^{?\{n\}} \cdot a^{\{n\}}$ the improvement gives 
+another factor of 100 speedup. Nice!
 
-TBD
+\begin{center}
+\begin{tabular}{cc}
+\begin{tikzpicture}
+\begin{axis}[
+    title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.04,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xmax=7100000,
+    ytick={0,5,...,30},
+    ymax=33,
+    %scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=5cm, 
+    legend entries={Scala V3, Scala V4},
+    legend pos=north west]
+\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
+\addplot[purple,mark=square*,mark options={fill=white}] table {re4.data};
+\end{axis}
+\end{tikzpicture}
+&
+\begin{tikzpicture}
+\begin{axis}[
+    title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.09,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xmax=8100000,
+    ytick={0,5,...,30},
+    ymax=33,
+    %scaled ticks=false,
+    axis lines=left,
+    width=5cm,
+    height=5cm, 
+    legend entries={Scala V3, Scala V4},
+    legend pos=north west]
+\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
+\addplot[purple,mark=square*,mark options={fill=white}] table {re4a.data};
+\end{axis}
+\end{tikzpicture}
+\end{tabular}
+\end{center}
 
 
 \section*{Proofs}
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/app52.scala
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/app52.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -0,0 +1,9 @@
+def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
+  case (Nil, r) => r
+  case (s, ZERO) => ZERO
+  case (s, ONE) => if (s == Nil) ONE else ZERO
+  case (s, CHAR(c)) => if (s == List(c)) ONE else 
+                       if (s == Nil) CHAR(c) else ZERO
+  case (s, ALT(r1, r2)) => ALT(ders2(s, r2), ders2(s, r2))
+  case (c::s, r) => ders2(s, der(c, r).simp)
+}
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/crawler2.scala
--- a/progs/crawler2.scala	Mon Aug 22 23:05:43 2016 +0200
+++ b/progs/crawler2.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -33,8 +33,8 @@
 }
 
 // starting URL for the crawler
-//val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc"""
-val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/msc-projects-14.html"""
+val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc"""
+
 
 // can now deal with depth 3 and beyond
 crawl(startURL, 2)
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/re1.scala
--- a/progs/re1.scala	Mon Aug 22 23:05:43 2016 +0200
+++ b/progs/re1.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -55,7 +55,8 @@
 }
 
 // the evil regular expression  a?{n} a{n}
-def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(CHAR('a')), CHAR('b'))
 
 //for measuring time
 def time_needed[T](i: Int, code: => T) = {
@@ -66,8 +67,11 @@
 }
 
 
-for (i <- 1 to 29) {
-  println(i + ": " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
+for (i <- 1 to 20) {
+  println(i + ": " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
 }
 
+for (i <- 1 to 6502 by 500) {
+  println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
+}
 
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/re2.scala
--- a/progs/re2.scala	Mon Aug 22 23:05:43 2016 +0200
+++ b/progs/re2.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -51,18 +51,18 @@
   (end - start)/(i * 1.0e9)
 }
 
-//for (i <- 1 to 100) {
-//  println(i + ": " + "%.5f".format(time_needed(1, matches(EVIL1(i), "a" * i))))
-//}
+for (i <- 1 to 100) {
+  println(i + ": " + "%.5f".format(time_needed(1, matches(EVIL1(i), "a" * i))))
+}
 
 //a bit bolder test
-for (i <- 1 to 1000 by 50) {
-  println(i + " " + "%.5f".format(time_needed(1, matches(EVIL1(i), "a" * i))))
+for (i <- 1 to 1001 by 50) {
+  println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
 }
 
 
-for (i <- 1 to 4002 by 500) {
-  println(i + " " + "%.5f".format(time_needed(4, matches(EVIL2, "a" * i))))
+for (i <- 1 to 4501 by 500) {
+  println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
 }
 
 
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/re4.scala
--- a/progs/re4.scala	Mon Aug 22 23:05:43 2016 +0200
+++ b/progs/re4.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -106,11 +106,11 @@
 val i = 5000
 println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i))))
 
-//for (i <- 1 to 10000 by 1000) {
-//  println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i))))
-//}
-
-for (i <- 1 to 6000001 by 500000) {
-  println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL2, "a" * i))))
+for (i <- 1 to 9001 by 1000) {
+  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
 }
 
+for (i <- 1 to 7500001 by 500000) {
+  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
+}
+
diff -r 065ca01b62ae -r 4ae59fd3b174 progs/re5.scala
--- a/progs/re5.scala	Mon Aug 22 23:05:43 2016 +0200
+++ b/progs/re5.scala	Tue Aug 23 12:26:13 2016 +0200
@@ -108,11 +108,11 @@
 val i = 5000
 println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i))))
 
-for (i <- 1 to 80000 by 10000) {
+for (i <- 1 to 7000001 by 1000000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
 }
 
-for (i <- 1 to 10000001 by 1000000) {
+for (i <- 1 to 7500001 by 500000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
 }