# HG changeset patch # User Christian Urban # Date 1443733800 -3600 # Node ID 2d112a7b29eba326eea241e5bdfd5354dbca626c # Parent fd89a63e9db3796407f50218e64b6667c4df0e89 updated diff -r fd89a63e9db3 -r 2d112a7b29eb slides/slides02.pdf Binary file slides/slides02.pdf has changed diff -r fd89a63e9db3 -r 2d112a7b29eb slides/slides02.tex --- a/slides/slides02.tex Thu Oct 01 21:22:03 2015 +0100 +++ b/slides/slides02.tex Thu Oct 01 22:10:00 2015 +0100 @@ -45,7 +45,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{\begin{tabular}{@ {}c@ {}}An Efficient Regular\\[-1mm] +\frametitle{\begin{tabular}{@ {}c@ {}} + An Efficient Regular\\[-1mm] Expression Matcher\end{tabular}} \footnotesize @@ -103,14 +104,9 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] -\frametitle{Languages, Strings} +\frametitle{Languages} \begin{itemize} -\item \alert{\bf Strings} are lists of characters, for example -\begin{center} -\bl{$[]$},\;\bl{$abc$} \hspace{2cm}(Pattern match: \bl{$c\!::\!s$}) -\end{center}\bigskip - \item A \alert{\bf language} is a set of strings, for example\medskip \begin{center} @@ -125,18 +121,110 @@ \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$} \end{tabular} \end{center} +\bigskip -%\item The \alert{\bf meaning} of a regular expression is a set of -% strings, or language. +\small +\item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$} + +\[ +\bl{A \,@\, B = \{fooa, foob, bara, barb\}} +\] + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{The Power Operation} + +\begin{itemize} +\item The \alert{\bf Power} of a language: + +\begin{center} +\begin{tabular}{lcl} +\bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ +\bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$} +\end{tabular} +\end{center}\bigskip + +\item[] For example + +\begin{center} +\begin{tabular}{l} +\bl{$A^4 = A \,@\, A \,@\, A \,@\, A$}\\ +\bl{$A^0 \dn \{[]\}$}\\ +\end{tabular} +\end{center} + \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{The Star Operation} + +\begin{itemize} +\item The \alert{\bf Star} of a \underline{language}: +\bigskip + +\begin{center} +\begin{tabular}{c} +\bl{$A^* \dn \bigcup_{0\le n} A^n$} +\end{tabular} +\end{center}\bigskip + +\item[] This expands to + +\[ +\bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots} +\] + +\small +\[ +\bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\; + A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots} +\] + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{Semantic Derivative} + +\begin{itemize} +\item The \alert{\bf Semantic Derivative} of a +\underline{language}\\ wrt to a character \bl{$c$}: +\bigskip + +\begin{center} +\bl{$Der\,c\,A \dn \{ s \;|\; c\!::\!s \in A\}$ } +\end{center}\bigskip\bigskip + +For \bl{$A = \{\textit{foo}, \textit{bar}, \textit{frak}\}$} then + +\begin{center} +\bl{\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l} +$Der\,f\,A$ & $=$ & $\{\textit{oo}, \textit{rak}\}$\\ +$Der\,b\,A$ & $=$ & $\{\textit{ar}\}$\\ +$Der\,a\,A$ & $=$ & $\varnothing$\\ +\end{tabular}} +\end{center} + +\end{itemize} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] -\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} +\frametitle{Regular Expressions} Their inductive definition: @@ -145,9 +233,9 @@ \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$} & null\\ & \bl{$\mid$} & \bl{$\epsilon$} & empty string / \pcode{""} / $[]$\\ - & \bl{$\mid$} & \bl{$c$} & character\\ - & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ - & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{$c$} & character\\ + & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ + & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ \end{tabular} \end{textblock} @@ -164,23 +252,18 @@ \begin{textblock}{15}(1,4) \begin{tabular}{@ {}rcl} \bl{$L(\varnothing)$} & \bl{$\dn$} & \bl{$\varnothing$}\\ - \bl{$L(\epsilon)$} & \bl{$\dn$} & \bl{$\{$[]$\}$}\\ + \bl{$L(\epsilon)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ - \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\ - \bl{$L(r^*)$} & \bl{$\dn$} & \bl{$\bigcup_{n \ge 0} L(r)^n$}\\ - \end{tabular}\bigskip - -\only<2->{ -\hspace{5mm}\textcolor{blue}{$L(r)^0 \;\dn\; \{[]\}$}\\ -\textcolor{blue}{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}} + \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\ + \bl{$L(r^*)$} & \bl{$\dn$} & \bl{$(L(r))^*$}\\ + \end{tabular} \end{textblock} -\only<1->{ \begin{textblock}{6}(9,12)\small \bl{$L$} is a function from regular expressions to sets of strings\\ \bl{$L$ : Rexp $\Rightarrow$ Set$[$String$]$} -\end{textblock}} +\end{textblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -199,7 +282,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{\begin{tabular}{c} - When Are Two Regular\\ + When Are Two Regular\\[-1mm] Expressions Equivalent?\end{tabular}} \large \begin{center} @@ -264,9 +347,9 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\newcommand{\YES}{\textcolor{gray}{yes}} -\newcommand{\NO}{\textcolor{gray}{no}} -\newcommand{\FORALLR}{\textcolor{gray}{$\forall$ r.}} +%\newcommand{\YES}{\textcolor{gray}{yes}} +%\newcommand{\NO}{\textcolor{gray}{no}} +%\newcommand{\FORALLR}{\textcolor{gray}{$\forall$ r.}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] @@ -283,9 +366,8 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}\bl{$(a?\{n\}) \cdot a\{n\}$}\end{tabular}} +\frametitle{\bl{$(a?\{n\}) \cdot a\{n\}$}} \begin{center} \begin{tikzpicture} @@ -313,13 +395,12 @@ \end{tikzpicture} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Evil Regular Expressions\end{tabular}} +\frametitle{Evil Regular Expressions} \begin{itemize} \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\bigskip @@ -333,49 +414,47 @@ \end{itemize} \end{itemize} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] -\frametitle{\begin{tabular}{c}A Matching Algorithm\end{tabular}} +\frametitle{A Matching Algorithm} \ldots{}whether a regular expression can match the empty string: \begin{center} \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} -\bl{$nullable(\varnothing)$} & \bl{$\dn$} & \bl{$f\!\/alse$}\\ -\bl{$nullable(\epsilon)$} & \bl{$\dn$} & \bl{$true$}\\ -\bl{$nullable (c)$} & \bl{$\dn$} & \bl{$f\!alse$}\\ -\bl{$nullable (r_1 + r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ -\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\ -\bl{$nullable (r^*)$} & \bl{$\dn$} & \bl{$true$} \\ +\bl{$nullable(\varnothing)$} & \bl{$\dn$} & \bl{\textit{false}}\\ +\bl{$nullable(\epsilon)$} & \bl{$\dn$} & \bl{\textit{true}}\\ +\bl{$nullable (c)$} & \bl{$\dn$} & \bl{\textit{false}}\\ +\bl{$nullable (r_1 + r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \vee nullable(r_2)$} \\ +\bl{$nullable (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$nullable(r_1) \wedge nullable(r_2)$} \\ +\bl{$nullable (r^*)$} & \bl{$\dn$} & \bl{\textit{true}}\\ \end{tabular} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp\end{tabular}} +\frametitle{The Derivative of a Rexp} \large If \bl{$r$} matches the string \bl{$c\!::\!s$}, what is a regular -expression that matches \bl{$s$}?\bigskip\bigskip\bigskip\bigskip +expression that matches just \bl{$s$}?\bigskip\bigskip\bigskip\bigskip \small \bl{$der\,c\,r$} gives the answer, Brzozowski 1964 -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative of a Rexp (2)\end{tabular}} +\frametitle{The Derivative of a Rexp} \begin{center} \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} @@ -386,7 +465,7 @@ \bl{$der\, c\, (r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{if $nullable (r_1)$}\\ & & \bl{then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$}\\ & & \bl{else $(der\, c\, r_1) \cdot r_2$}\\ - \bl{$der\, c\, (r^*)$} & \bl{$\dn$} & \bl{$(der\,c\,r) \cdot (r^*)$} &\medskip\\\pause + \bl{$der\, c\, (r^*)$} & \bl{$\dn$} & \bl{$(der\,c\,r) \cdot (r^*)$} &\bigskip\\\pause \bl{$\textit{ders}\, []\, r$} & \bl{$\dn$} & \bl{$r$} & \\ \bl{$\textit{ders}\, (c\!::\!s)\, r$} & \bl{$\dn$} & \bl{$\textit{ders}\,s\,(der\,c\,r)$} & \\ @@ -397,22 +476,20 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Examples\end{tabular}} +\frametitle{Examples} Given \bl{$r \dn ((a \cdot b) + b)^*$} what is \begin{center} \begin{tabular}{l} -\bl{$der\,a\,r =?$}\\ -\bl{$der\,b\,r =?$}\\ -\bl{$der\,c\,r =?$} +\bl{$der\,a\,r =\;?$}\\ +\bl{$der\,b\,r =\;?$}\\ +\bl{$der\,c\,r =\;?$} \end{tabular} \end{center} - -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -430,7 +507,8 @@ & whether \bl{$r_4$} can recognise\\ & the empty string\smallskip\\ Output: & result of the test\\ - & $\Rightarrow true \,\text{or}\, \textit{false}$\\ + & $\Rightarrow \bl{\textit{true}} \,\text{or}\, + \bl{\textit{false}}$\\ \end{tabular} \end{center} @@ -439,7 +517,25 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ +\begin{frame}[c] +\frametitle{The Idea of the Algorithm} + +If we want to recognise the string \bl{$abc$} with regular +expression \bl{$r_1$} then\medskip + +\begin{enumerate} +\item \bl{$Der\,a\,(L(r_1))$}\pause +\item \bl{$Der\,b\,(Der\,a\,(L(r_1)))$}\pause +\item \bl{$Der\,c\,(Der\,b\,(Der\,a\,(L(r_1))))$}\bigskip +\item finally we test whether the empty string is in this set\medskip +\end{enumerate} + +The matching algorithm works similarly, just over regular expressions instead of sets. +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{\begin{tabular}{c}\bl{$(a?\{n\}) \cdot a\{n\}$}\end{tabular}} @@ -470,15 +566,15 @@ \end{tikzpicture} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}A Problem\end{tabular}} +\frametitle{A Problem} -We represented the ``n-times'' \bl{$a\{n\}$} as a sequence regular expression: +We represented the ``n-times'' \bl{$a\{n\}$} as a +sequence regular expression: \begin{center} \begin{tabular}{rl} @@ -492,14 +588,14 @@ \end{tabular} \end{center} -This problem is aggravated with \bl{$a?$} being represented as \bl{$\epsilon + a$}. -\end{frame}} +This problem is aggravated with \bl{$a?$} being represented +as \bl{$\epsilon + a$}. +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}Solving the Problem\end{tabular}} +\frametitle{Solving the Problem} What happens if we extend our regular expressions @@ -512,14 +608,13 @@ \end{center} What is their meaning? What are the cases for \bl{$nullable$} and \bl{$der$}? -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[t] -\frametitle{\begin{tabular}{c}\bl{$(a?\{n\}) \cdot a\{n\}$}\end{tabular}} +\frametitle{\bl{$(a?\{n\}) \cdot a\{n\}$}} \begin{center} \begin{tikzpicture} @@ -550,7 +645,7 @@ \end{tikzpicture} \end{center} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%