diff -r 065ca01b62ae -r 4ae59fd3b174 handouts/ho02.tex --- a/handouts/ho02.tex Mon Aug 22 23:05:43 2016 +0200 +++ b/handouts/ho02.tex Tue Aug 23 12:26:13 2016 +0200 @@ -24,7 +24,7 @@ \begin{center} -Plots: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\ +Graphs: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\ \begin{tabular}{@{}cc@{}} \begin{tikzpicture} \begin{axis}[ @@ -64,7 +64,7 @@ axis lines=left, width=6.5cm, height=5cm] -\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data}; +\addplot[green,mark=square*,mark options={fill=white}] table {re2.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; \end{axis} \end{tikzpicture} @@ -72,7 +72,7 @@ \end{center} \begin{center} -Plots: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$ +Graphs: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$ \begin{tabular}{@{}cc@{}} \begin{tikzpicture} \begin{axis}[ @@ -91,7 +91,7 @@ legend entries={Java}, legend pos=north west, legend cell align=left] -\addplot[blue,mark=*, mark options={fill=white}] table {re-java.data}; +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; \end{axis} \end{tikzpicture} & @@ -109,7 +109,7 @@ axis lines=left, width=6.5cm, height=5cm] -\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data}; +\addplot[green,mark=square*,mark options={fill=white}] table {re2.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; \end{axis} \end{tikzpicture} @@ -482,32 +482,33 @@ \begin{center} \begin{tikzpicture} \begin{axis}[ - title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, + title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, xlabel={$n$}, x label style={at={(1.05,0.0)}}, ylabel={time in secs}, enlargelimits=false, - xtick={0,500,...,4000}, - xmax=4100, + xtick={0,1000,...,6500}, + xmax=6800, ytick={0,5,...,30}, + ymax=34, scaled ticks=false, axis lines=left, - width=10cm, + width=8cm, height=4.5cm, legend entries={Java,Scala V1}, legend pos=north east, legend cell align=left] -\addplot[blue,mark=*, mark options={fill=white}] +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; \addplot[red,mark=triangle*,mark options={fill=white}] - table {re2c.data}; + table {re1a.data}; \end{axis} \end{tikzpicture} \end{center} \noindent -This is no error: it hardly takes more than half a second for -strings up to the length of 4000. After that we receive a +This is not an error: it hardly takes more than half a second for +strings up to the length of 6500. After that we receive a StackOverflow exception, but still\ldots For running the algorithm with our first example, the evil @@ -524,13 +525,13 @@ \begin{center} \begin{tikzpicture} \begin{axis}[ - title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, + title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, xlabel={$n$}, x label style={at={(1.05,0.0)}}, ylabel={time in secs}, enlargelimits=false, xtick={0,5,...,30}, - xmax=30, + xmax=32, ytick={0,5,...,30}, scaled ticks=false, axis lines=left, @@ -538,7 +539,7 @@ height=5cm, legend entries={Python,Ruby,Scala V1}, legend pos=outer north east, - legend cell align=left ] + legend cell align=left] \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; \addplot[brown,mark=pentagon*, mark options={fill=white}] @@ -584,7 +585,7 @@ \begin{center} \begin{tikzpicture} \begin{axis}[ - title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, + title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, xlabel={$n$}, x label style={at={(1.01,0.0)}}, ylabel={time in secs}, @@ -606,7 +607,7 @@ \addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data}; \addplot[green,mark=square*,mark options={fill=white}] - table {re2b.data}; + table {re2.data}; \end{axis} \end{tikzpicture} \end{center} @@ -676,7 +677,7 @@ \begin{center} \begin{tikzpicture} \begin{axis}[ - title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, + title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, xlabel={$n$}, x label style={at={(1.04,0.0)}}, ylabel={time in secs}, @@ -684,42 +685,121 @@ xtick={0,2000,...,12000}, xmax=13000, ytick={0,5,...,30}, + ymax=12, scaled ticks=false, axis lines=left, width=9cm, height=5cm, - legend entries={Scala V2,Scala V3}] -\addplot[green,mark=square*,mark options={fill=white}] table {re2b.data}; + legend entries={Scala V2,Scala V3}, + legend pos=outer north east, + legend cell align=left] +\addplot[green,mark=square*,mark options={fill=white}] table {re2.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; \end{axis} \end{tikzpicture} \end{center} +\noindent +To reacap, Python and Ruby needed approximately 30 seconds to match +a string of 28 \texttt{a}s and the regular expression $a^{?\{n\}} \cdot a^{\{n\}}$. +We need a third of this time to do the same with strings up to 12,000 \texttt{a}s. +Similarly, Java needed 30 seconds to find out the regular expression +$(a^*)^* \cdot b$ does not match the string of 28 \texttt{a}s. We can do +the same in approximately 5 seconds for strings of 6000000 \texttt{a}s: + + \begin{center} \begin{tikzpicture} \begin{axis}[ - title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, + title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, xlabel={$n$}, x label style={at={(1.09,0.0)}}, ylabel={time in secs}, enlargelimits=false, - %xtick={0,1e+6,...,6e+6}, - xmax=6200000, + xmax=7700000, ytick={0,5,...,30}, - ymax=33, - scaled ticks=false, + ymax=15, + %scaled ticks=false, axis lines=left, width=9cm, height=5cm, - legend entries={Scala V3}] + legend entries={Scala V2, Scala V3}, + legend pos=outer north east, + legend cell align=left] +\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data}; \end{axis} \end{tikzpicture} \end{center} -\section{Epilogue} +\subsection*{Epilogue} + +(23/Aug/2016) I recently found another place where this algorithm can be +sped (this idea is not integrated with what is coming next, +but I present it nonetheless). The idea is to define \texttt{ders} +not such that it iterates the derivative character-by-character, but +in bigger chunks. The resulting code for \texttt{ders2} looks as +follows: + +\lstinputlisting[numbers=none]{../progs/app52.scala} + +\noindent +I have not fully understood why this version is much faster, +but it seems it is a combination of the clauses for \texttt{ALT} +and \texttt{SEQ}. In the latter case we call \texttt{der} with +a single character and this potentially produces an alternative. +The derivative of such an alternative can then be more effeciently +calculated by \texttt{ders2} since it pushes a whole string +under an \texttt{ALT}. The numbers are that in the second case +$(a^*)^* \cdot b$ both versions are pretty much the same, but in the +first case $a^{?\{n\}} \cdot a^{\{n\}}$ the improvement gives +another factor of 100 speedup. Nice! -TBD +\begin{center} +\begin{tabular}{cc} +\begin{tikzpicture} +\begin{axis}[ + title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$}, + xlabel={$n$}, + x label style={at={(1.04,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xmax=7100000, + ytick={0,5,...,30}, + ymax=33, + %scaled ticks=false, + axis lines=left, + width=5cm, + height=5cm, + legend entries={Scala V3, Scala V4}, + legend pos=north west] +\addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; +\addplot[purple,mark=square*,mark options={fill=white}] table {re4.data}; +\end{axis} +\end{tikzpicture} +& +\begin{tikzpicture} +\begin{axis}[ + title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, + xlabel={$n$}, + x label style={at={(1.09,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xmax=8100000, + ytick={0,5,...,30}, + ymax=33, + %scaled ticks=false, + axis lines=left, + width=5cm, + height=5cm, + legend entries={Scala V3, Scala V4}, + legend pos=north west] +\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data}; +\addplot[purple,mark=square*,mark options={fill=white}] table {re4a.data}; +\end{axis} +\end{tikzpicture} +\end{tabular} +\end{center} \section*{Proofs}