# HG changeset patch # User Christian Urban # Date 1538347118 -3600 # Node ID 2be8c4c77418f0b45c522186d68e5228f7af59dd # Parent b5d57d7064bbc948c13063a16c255f3c06a019b7 updated diff -r b5d57d7064bb -r 2be8c4c77418 data.sty --- a/data.sty Sun Sep 30 12:01:14 2018 +0100 +++ b/data.sty Sun Sep 30 23:38:38 2018 +0100 @@ -1,5 +1,6 @@ % The data files, written on the first run. +%% example a?{n} a{n} \begin{filecontents}{re-python.data} 1 0.029 5 0.029 @@ -21,6 +22,7 @@ #29 58.886 \end{filecontents} + \begin{filecontents}{re-python2.data} 1 0.033 5 0.036 @@ -39,7 +41,7 @@ 28 26.69 \end{filecontents} - +%% example a?{n} a{n} \begin{filecontents}{re-ruby.data} 1 0.00006 #2 0.00003 @@ -71,6 +73,7 @@ 28 34.79653 \end{filecontents} +% Java 8, example (a*)*b \begin{filecontents}{re-java.data} 5 0.00298 10 0.00418 @@ -90,6 +93,30 @@ 28 29.81185 \end{filecontents} +% Java 9+, example (a*)*b +\begin{filecontents}{re-java9.data} +1000 0.01871 +3000 0.16727 +5000 0.44669 +7000 0.87708 +9000 1.46304 +11000 2.21094 +13000 3.08650 +15000 4.23359 +17000 4.97240 +19000 6.50150 +21000 8.43740 +23000 9.66842 +25000 10.93754 +27000 13.51069 +29000 14.73643 +31000 16.69299 +33000 19.04270 +35000 21.08329 +37000 23.75398 +39000 26.15787 +\end{filecontents} + %% re1.scala: example a?{n} a{n} \begin{filecontents}{re1.data} 1 0.00179 diff -r b5d57d7064bb -r 2be8c4c77418 progs/app6.scala --- a/progs/app6.scala Sun Sep 30 12:01:14 2018 +0100 +++ b/progs/app6.scala Sun Sep 30 23:38:38 2018 +0100 @@ -16,7 +16,6 @@ case (r1s, r2s) => SEQ(r1s, r2s) } } - case NTIMES(r, n) => NTIMES(simp(r), n) case r => r } diff -r b5d57d7064bb -r 2be8c4c77418 progs/app60.scala --- a/progs/app60.scala Sun Sep 30 12:01:14 2018 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -def ders(s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} diff -r b5d57d7064bb -r 2be8c4c77418 slides.sty --- a/slides.sty Sun Sep 30 12:01:14 2018 +0100 +++ b/slides.sty Sun Sep 30 23:38:38 2018 +0100 @@ -40,9 +40,9 @@ \vbox{% \begin{minipage}{1.05\textwidth}% \centering% -\begin{tabular}{@{}c@{}}% +%\begin{tabular}{@{}c@{}}% \insertframetitle% -\end{tabular}% +%\end{tabular}% \end{minipage}\vspace{-10pt}}% } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff -r b5d57d7064bb -r 2be8c4c77418 slides/slides02.pdf Binary file slides/slides02.pdf has changed diff -r b5d57d7064bb -r 2be8c4c77418 slides/slides02.tex --- a/slides/slides02.tex Sun Sep 30 12:01:14 2018 +0100 +++ b/slides/slides02.tex Sun Sep 30 23:38:38 2018 +0100 @@ -44,14 +44,14 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{\begin{tabular}{@ {}c@ {}} - An Efficient Regular\\[-1mm] - Expression Matcher\end{tabular}} +\begin{frame}[t] + \frametitle{\Large + Lets Implement an Efficient\\[-2mm] + Regular Expression Matcher} \footnotesize \begin{center} - {\normalsize Graphs: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$} and strings \bl{$\underbrace{\,a\ldots a\,}_{n}$}}\smallskip\\ + {\normalsize Graphs: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$} and strings \bl{$\underbrace{\,a\ldots a\,}_{n}$}}\medskip\\ \begin{tabular}{@{}cc@{}} \begin{tikzpicture} \begin{axis}[ @@ -90,7 +90,7 @@ width=5cm, height=4.5cm, legend entries={Scala V2,Scala V3}, - legend pos=north west, + legend pos=north east, legend cell align=left] \addplot[green,mark=square*,mark options={fill=white}] table {re2.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3.data}; @@ -100,7 +100,7 @@ \end{center} \small -In the handouts is a similar graph for \bl{$(a^*)^* \cdot b$} and Java. +In the handouts is a similar graph for \bl{$(a^*)^* \cdot b$} and Java 8. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -113,11 +113,11 @@ \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\bigskip \item Evil regular expressions\medskip \begin{itemize} -\item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$} +\item \bl{$a^{?\{n\}} \cdot a^{\{n\}}$} \item \bl{$(a^*)^*$} \item \bl{$([a$\,-\,$z]^+)^*$} \item \bl{$(a + a \cdot a)^*$} -\item \bl{$(a + a?)^*$} +\item \bl{$(a + a^?)^*$} \end{itemize} \item sometimes also called \alert{catastrophic backtracking} @@ -231,6 +231,8 @@ \bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots} \] +or + \small \[ \bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\; @@ -244,8 +246,9 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] - Regular Expression\end{tabular}} +\frametitle{ + The Meaning of a\\[-2mm] + Regular Expression} \begin{textblock}{15}(1,4) \begin{tabular}{rcl} @@ -268,17 +271,19 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{Semantic Derivative} +\begin{frame}[t] +\frametitle{Semantic Derivative\\[5mm]} + + \begin{itemize} \item The \alert{\bf Semantic Derivative} of a -\underline{language}\\ wrt to a character \bl{$c$}: +\underline{language}\\ w.r.t.~to a character \bl{$c$}: \bigskip \begin{center} \bl{$\Der\,c\,A \dn \{ s \;|\; c\!::\!s \in A\}$ } -\end{center}\bigskip\bigskip\bigskip +\end{center}\bigskip For \bl{$A = \{\textit{foo}, \textit{bar}, \textit{frak}\}$} then @@ -286,7 +291,7 @@ \bl{\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l} $\Der\,f\,A$ & $=$ & $\{\textit{oo}, \textit{rak}\}$\\ $\Der\,b\,A$ & $=$ & $\{\textit{ar}\}$\\ -$\Der\,a\,A$ & $=$ & $\{\}$\\\pause +$\Der\,a\,A$ & $=$ & $\{\}$\pause \end{tabular}} \end{center} @@ -304,7 +309,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\begin{tabular}{c}The Specification\\ of Matching\end{tabular}} +\frametitle{The Specification\\ for Matching} \begin{bubble}[10cm] \large @@ -354,9 +359,10 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\begin{tabular}{c} +\frametitle{ When Are Two Regular\\[-1mm] - Expressions Equivalent?\end{tabular}} + Expressions Equivalent?} + \large \begin{center} \bl{$r_1 \equiv r_2 \;\;\dn\;\; L(r_1) = L(r_2)$} @@ -404,7 +410,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] \frametitle{Simplification Rules} - + \begin{center} \bl{\begin{tabular}{rcl} $r + \ZERO$ & $\equiv$ & $r$\\ @@ -423,7 +429,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] -\frametitle{A Matching Algorithm} +\frametitle{A Matching Algorithm (1)} \ldots{}whether a regular expression can match the empty string: @@ -497,11 +503,11 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{The Algorithm} +\frametitle{\mbox{The Brzozowski Algorithm}} \begin{center} \begin{tabular}{l} -\bl{$\textit{matches}\,r\,s \dn \textit{nullable}(\ders\,r\,s)$} +\bl{$\textit{matches}\,r\,s \dn \textit{nullable}(\ders\;s\;r)$} \end{tabular} \end{center} @@ -511,7 +517,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] -\frametitle{An Example} +\frametitle{Brzozowski: An Example} Does \bl{$r_1$} match \bl{$abc$}? \begin{center} @@ -521,7 +527,7 @@ Step 3: & build derivative of \bl{$c$} and \bl{$r_3$} & \bl{$(r_4 = \der\,c\,r_3)$}\smallskip\\ Step 4: & the string is exhausted: & \bl{($nullable(r_4))$}\\ & test whether \bl{$r_4$} can recognise\\ - & the empty string\smallskip\\ + & the empty string\medskip\\ Output: & result of the test\\ & $\Rightarrow \bl{\textit{true}} \,\text{or}\, \bl{\textit{false}}$\\ @@ -554,7 +560,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{Oops\ldots\bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}} +\frametitle{Oops\ldots \;\bl{$a^{?\{n\}} \cdot a^{\{n\}}$}} \begin{center} \begin{tikzpicture} @@ -588,8 +594,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{A Problem} +\begin{frame}[t] +\frametitle{A Problem\medskip} We represented the ``n-times'' \bl{$a^{\{n\}}$} as a sequence regular expression: @@ -634,7 +640,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] -\frametitle{\bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}} +\frametitle{Brzozowski: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}} \begin{center} \begin{tikzpicture} @@ -687,7 +693,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{Simplifiaction} +\frametitle{Simplification Rules} \begin{center} \bl{\begin{tabular}{rcl} @@ -702,7 +708,7 @@ \end{center} \footnotesize -\lstinputlisting{../progs/app60.scala} +\lstinputlisting[firstline=22,lastline=26]{../progs/app6.scala} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -710,7 +716,7 @@ \begin{frame}[c] \footnotesize -\lstinputlisting{../progs/app6.scala} +\lstinputlisting[firstline=1,lastline=21]{../progs/app6.scala} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -718,7 +724,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] -\frametitle{\bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}} +\frametitle{Brzozowski: \bl{$a^{?\{n\}} \cdot a^{\{n\}}$}} \begin{center} \begin{tikzpicture} @@ -747,41 +753,81 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\begin{frame}[c] -%\frametitle{\bl{$(a^*)^* \cdot b$}} -% -%\begin{center} -%\begin{tikzpicture} -% \begin{axis}[ -% xlabel={$n$}, -% x label style={at={(1.09,0.0)}}, -% ylabel={time in secs}, -% enlargelimits=false, -% xmax=27, -% xtick={0,5,...,20}, -% ytick={0,5,...,25}, -% ymax=13, -% scaled ticks=false, -% axis lines=left, -% width=9cm, -% height=5cm, -% legend entries={Scala V2}, -% legend pos=north west, -% legend cell align=left] -%\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data}; -%% still needs to be done -%%\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data}; -%\end{axis} -%\end{tikzpicture} -%\end{center} -% -%\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\bl{$(a^*)^* \cdot b$}} + \frametitle{ + Another Example\\[-1mm] + in Java \liningnums{8} and Python} + +\begin{center} +\begin{tikzpicture} + \begin{axis}[ + xlabel={$n$}, + x label style={at={(1.05,0.0)}}, + ylabel={time in secs}, + enlargelimits=false, + xtick={0,5,...,30}, + xmax=33, + ymax=35, + ytick={0,10,...,30}, + scaled ticks=false, + axis lines=left, + width=9cm, + height=5cm, + legend entries={Java 8, Python}, + legend pos=north west, + legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; +\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; +\end{axis} +\end{tikzpicture} +\end{center} + +Regex: \bl{$(a^*)^* \cdot b$} + +Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Same Example in Java \liningnums{9}+} + +\begin{center} +\begin{tikzpicture} + \begin{axis}[ + xlabel={$n$}, + x label style={at={(1.09,-0.15)}}, + ylabel={time in secs}, + scaled x ticks=false, + enlargelimits=false, + xtick distance=10000, + xmax=44000, + ytick={0,10,...,30}, + ymax=35, + axis lines=left, + width=9cm, + height=5cm, + legend entries={Java \liningnums{9}+}, + legend pos=north west, + legend cell align=left] +\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data}; +\end{axis} +\end{tikzpicture} +\end{center} + +Regex: \bl{$(a^*)^* \cdot b$} + +Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{and with Brzozowski} \begin{center} \begin{tikzpicture} @@ -789,14 +835,17 @@ xlabel={$n$}, x label style={at={(1.09,0.0)}}, ylabel={time in secs}, + scaled x ticks=false, + xtick distance=2000000, enlargelimits=false, + xmax=6400000, ytick={0,10,...,30}, ymax=35, axis lines=left, width=9cm, height=5cm, legend entries={Scala V3}, - legend pos=north east, + legend pos=north west, legend cell align=left] %\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data}; \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data}; @@ -804,11 +853,13 @@ \end{tikzpicture} \end{center} +Regex: \bl{$(a^*)^* \cdot b$} + +Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$} + \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] \frametitle{What is good about this Alg.}