updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Sat, 03 Oct 2020 00:51:47 +0100
changeset 769 f9686b22db7e
parent 768 34f77b976b88
child 770 c563cf946497
updated
progs/matcher/re1.sc
progs/matcher/re2.sc
progs/matcher/re3.sc
slides/slides01.pdf
slides/slides01.tex
slides/slides02.pdf
slides/slides02.tex
videos/02-algo1.srt
videos/02-algo2.srt
--- a/progs/matcher/re1.sc	Tue Sep 29 21:52:52 2020 +0100
+++ b/progs/matcher/re1.sc	Sat Oct 03 00:51:47 2020 +0100
@@ -128,7 +128,7 @@
 }
 
 // the expicit expansion in EVIL1(n) increases
-// drastically its size
+// drastically its size - (a?){n} a{n}
 
 size(EVIL1(1))  // 5
 size(EVIL1(3))  // 17
@@ -136,6 +136,9 @@
 size(EVIL1(7))  // 41
 size(EVIL1(20)) // 119
 
+size(ders(("a" * 20).toList, EVIL1(20))) 
+
+
 // given a regular expression and building successive
 // derivatives might result into bigger and bigger
 // regular expressions...here is an example for this:
--- a/progs/matcher/re2.sc	Tue Sep 29 21:52:52 2020 +0100
+++ b/progs/matcher/re2.sc	Sat Oct 03 00:51:47 2020 +0100
@@ -28,7 +28,7 @@
   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   case STAR(_) => true
-  case NTIMES(r, i) => if (i == 0) true else nullable(r)
+  case NTIMES(r, n) => if (n == 0) true else nullable(r)
 }
 
 
@@ -41,8 +41,8 @@
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
     else SEQ(der(c, r1), r2)
   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
-  case NTIMES(r1, i) => 
-    if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
+  case NTIMES(r, n) => 
+    if (n == 0) ZERO else SEQ(der(c, r), NTIMES(r, n - 1))
 }
 
 def ders (s: List[Char], r: Rexp) : Rexp = s match {
@@ -80,7 +80,7 @@
   println("Test (a?{n}) (a{n})")
 
   for (i <- 0 to 1000 by 100) {
-    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
+    println(f"$i: ${time_needed(1, matcher(EVIL1(i), "a" * i))}%.5f")
   }
 }  
 
@@ -91,7 +91,7 @@
   println("Test (a*)* b")
 
   for (i <- 0 to 30 by 2) {
-    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
+    println(f"$i: ${time_needed(1, matcher(EVIL2, "a" * i))}%.5f")
   }
 }
 
@@ -124,8 +124,10 @@
 size(ders("aaaaa".toList, EVIL1(5)))  // 122
 size(ders("aaaaaa".toList, EVIL1(5))) // 151
 
+size(ders(("a" * 20).toList, EVIL1(5))) 
+
 // but the size of the derivatives can still grow 
-// quite dramatically in case of EVIL2
+// quite dramatically in case of EVIL2:  (a*)* b
 
 size(ders("".toList, EVIL2))       // 5
 size(ders("a".toList, EVIL2))      // 12
--- a/progs/matcher/re3.sc	Tue Sep 29 21:52:52 2020 +0100
+++ b/progs/matcher/re3.sc	Sat Oct 03 00:51:47 2020 +0100
@@ -102,7 +102,7 @@
 def test1() = {
   println("Test (a?{n}) (a{n})")
 
-  for (i <- 0 to 8000 by 1000) {
+  for (i <- 0 to 9000 by 1000) {
     println(f"$i: ${time_needed(3, matcher(EVIL1(i), "a" * i))}%.5f")
   }
 }  
Binary file slides/slides01.pdf has changed
--- a/slides/slides01.tex	Tue Sep 29 21:52:52 2020 +0100
+++ b/slides/slides01.tex	Sat Oct 03 00:51:47 2020 +0100
@@ -1453,8 +1453,265 @@
 \begin{frame}[c]
 \frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
 
+
+\begin{tabular}{lll}
+  TAs: & Anton Luca-Dorin & (took the module last year)\\
+       & Chengsong Tan    & (PhD student working on derivatives)  
+\end{tabular}  
 \mbox{}
 \end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{Coursework}
+  Do we need to provide instructions on running the coursework files
+  if we're using languages other than Scala? Thanks
+\end{mybox3}\pause
+
+\begin{mybox2}{Zip-File for Coursework}
+  Please, please submit a zipfile that generates a subdirectory
+  \begin{center}
+  \texttt{NameFamilyName}  
+  \end{center}  
+\end{mybox2}
+\end{frame}
+
+
+\begin{frame}[c]
+\begin{mybox3}{Coursework}
+  What is the purpose of the workshop session on the timetable?
+  
+  Slightly confused about how to undertake cw1 and what exactly we
+  should be implementing. This is more for clarification of the cw1
+  structure, including the implementation and questions present in
+  cw1.
+\end{mybox3}
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{What is the trick?}\small
+  What was the trick to improve the evil regular expressions matcher
+  to have such good results compared to other programming languages?
+  Is it working better on casual regular expressions (the ones that
+  Python and Java handle pretty well), too? Or was it just optimised
+  for these evil ones?
+\end{mybox3}
+
+\begin{mybox3}{}\small
+  It was shown in the lectures that the pattern matching algorithms
+  currently implemented in popular programming languages (Python, JS,
+  Java, etc) are far slower than the algorithm we are going to be
+  implementing in this module. My question is why do these programming
+  languages not implement the algorithm that we are going to implement
+  in this module?
+\end{mybox3}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+  \frametitle{Thanks to Martin Mikusovic}
+
+\bigskip    
+\begin{center}
+\begin{tikzpicture}
+  \begin{axis}[
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,10,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=9cm,
+    height=5.5cm, 
+    legend entries={Java 8, Python, JavaScript, Swift},  
+    legend pos=north west,
+    legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
+\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+Regex: \bl{$(a^*)^* \cdot b$}
+
+Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+\frametitle{Same Example in Java 9+}
+
+\begin{center}
+\begin{tikzpicture}
+  \begin{axis}[
+    xlabel={$n$},
+    x label style={at={(1.09,-0.15)}},
+    ylabel={time in secs},
+    scaled x ticks=false,
+    enlargelimits=false,
+    xtick distance=10000,
+    xmax=44000, 
+    ytick={0,10,...,30}, 
+    ymax=35, 
+    axis lines=left,
+    width=9cm,
+    height=5cm, 
+    legend entries={Java \liningnums{9}+},
+    legend pos=north west,
+    legend cell align=left]
+\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+Regex: \bl{$(a^*)^* \cdot b$}
+
+Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[c]
+\begin{mybox3}{}
+  Are there any (common) languages that have a built-in regex
+  implementation matching the set of functions of a formal 'simple'
+  regular expression, as opposed to an 'extended' regular expression
+  implemented in most regex-supporting languages?
+\end{mybox3}
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{Passing Mark}
+  I believe the assessment is 70\% coursework (broken into 10\% weekly
+  stuff, 15\% mid term exam and 45\% CW in any programming language)
+  and 30\% January exam. However, I would like to know if we just need
+  40\% overall to pass the module or pass the each component
+  individually?
+\end{mybox3}
+
+\hfill$\Rightarrow$ 40\% overall
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{Regexes}
+  Can we determine all the possible regular expressions matching a
+  certain string? If we take into account all the possible ways to
+  combine the operations: \bl{$\ZERO$}, \bl{$\ONE$},
+  \bl{$r_1 + r_2$}, \bl{$r_1 \cdot r_2$}, \bl{$r^*$}?
+\end{mybox3}
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{\bl{$L$} + Equivalence}
+  When we explain why two regular expressions are not equivalent, what
+  method is better for us, using mathematics formulas or making an
+  example? 
+\end{mybox3}
+\begin{mybox3}{}
+  Meaning of Regex and Operations
+\end{mybox3}
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{\bl{$L$}}
+  Can the function L be applied to anything other than regular
+  expressions? For example would L(L(c)) return anything?
+\end{mybox3}
+
+\hfill $\Rightarrow$ No
+\end{frame} 
+
+\begin{frame}[c]
+\begin{mybox3}{\bl{$(a?)\{n\} \cdot a\{n\}$}}
+  In the evil regexes section, is there any reason why in the regex
+  \texttt{[a?]\{n\}[a]\{n\}} the square brackets are used? It is defined as a
+  single character from the square brackets, however there is just one
+  character, so it seems like it is not necessary. Maybe it is just
+  necessary for the first part, because ? is a token instead of a
+  character and we need to refer to a? as a ``unit''? Could regular
+  brackets be used instead? Is there any difference apart from the
+  fact that it would create a group? Also, are the regexes
+  \texttt{[a?]\{n\}} and
+  \texttt{a\{0,3\}} equivalent?
+\end{mybox3}
+\end{frame} 
+
+\begin{frame}[c]
+\begin{mybox3}{Python + Parser Combinators (CW3)}\small
+  Hi Christian,
+
+  I don’t see a problem: you certainly have higher order functions and
+  it is easy to implement algebraic data types using classes. As far
+  as I can see that’s all you need. You don’t get the static types but
+  that should be obvious. Basically if you can do it in LISP you can
+  do it in Python. The only problem could be stack overflows due to a
+  lack of tail recursion optimisation. On the other hand you can
+  simulate laziness using generators.
+
+  Cheers,
+  Thorsten 
+\end{mybox3}
+
+Trees \url{https://youtu.be/7tCNu4CnjVc}
+
+Laziness \url{https://youtu.be/5jwV3zxXc8E}
+
+\end{frame}
+
+\begin{frame}[c]
+\begin{mybox3}{}
+  What suggestions do you have for us to get the most out of this
+  module, especially in the online format? I.e. form discussion
+  groups, will you have office hours?
+\end{mybox3}
+
+\small
+\hfill $\Rightarrow$\mbox{} Discussion Forum on KEATS
+
+\hfill online tutorial sessions
+
+\hfill ???
+
+\hfill PL-groups for ``exotic'' langs
+\end{frame}
+
+\begin{frame}[c]
+ \small
+\begin{mybox3}{}
+Where do most students struggle with this module? What will the format
+of the exam be? What is the most efficient way of studying for the
+exam? There are plenty of resources available on KEATS, but is there
+anything else you'd recommend us to study? Although (just by skimming
+the headings) the module seems to be a combination of practical and
+theoretical matters, exactly in what field would the syllabus be
+applied? Besides these questions and the ones other students asked, is
+there anything else we should know? Thank you!
+\end{mybox3}
+\end{frame}
+
+
+\begin{frame}[c]
+\end{frame}
+
+\begin{frame}[c]
+\end{frame}
+
+\begin{frame}[c]
+\end{frame}
+
+\begin{frame}[c]
+\end{frame}
+
+\begin{frame}[c]
+\end{frame}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 \end{document}
 
Binary file slides/slides02.pdf has changed
--- a/slides/slides02.tex	Tue Sep 29 21:52:52 2020 +0100
+++ b/slides/slides02.tex	Sat Oct 03 00:51:47 2020 +0100
@@ -637,6 +637,118 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+  \frametitle{$der$ for $n$-times}
+
+Case \bl{$n = 2$} and \bl{$r \cdot r$}: 
+
+\begin{center}
+  \begin{tabular}{lcl}
+    \bl{$der\,c\,(r\cdot r)$} & \bl{$\dn$} & \bl{if \; $nullable(r)$}\\
+                              &   & \bl{then \; \alert<3>{$(der\,c\,r)\cdot r + der\,c\,r$}}\\
+                              &   & \bl{else \; $(der\,c\,r)\cdot r$}\bigskip\pause\\
+    my claim                  &   & \bl{$\equiv\;$} \bl{$(der\,c\,r)\cdot r$}\\
+    (in this case)                                
+  \end{tabular} 
+\end{center}  
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+
+We know \bl{$nullable(r)$} holds!\pause 
+
+\begin{center}
+  \begin{tabular}{@{}lcl}
+    \bl{$(der\,c\,r)\cdot r + der\,c\,r$}\pause
+    & \bl{$\equiv$} & \bl{$(der\,c\,r)\cdot r + (der\,c\,r) \cdot \ONE$}\medskip\pause\\
+    & \bl{$\equiv$} & \bl{$(der\,c\,r)\cdot (r + \ONE)$}\medskip\pause\\
+    & \bl{$\equiv$} & \bl{$(der\,c\,r)\cdot r$}\\
+    \multicolumn{3}{r}{\small(remember \bl{$r$} is nullable)}
+  \end{tabular} 
+\end{center}\pause
+
+\rule{13cm}{0.8mm}
+
+\begin{textblock}{13}(2,10)
+\only<6>{%
+  \begin{tabular}{lcl}
+    \bl{$der\,c\,(r\cdot r)$} & \bl{$\dn$} & \bl{if \; $nullable(r)$}\\
+                          &   & \bl{then \; $(der\,c\,r)\cdot r + der\,c\,r$}\\
+                          &   & \bl{else \; $(der\,c\,r)\cdot r$}                                
+  \end{tabular}}
+\only<7>{%
+  \begin{tabular}{lcl}
+    \bl{$der\,c\,(r\cdot r)$} & \bl{$\dn$} & \bl{if \; $nullable(r)$}\\
+                          &   & \bl{then \; $(der\,c\,r)\cdot r$}\\
+                          &   & \bl{else \; $(der\,c\,r)\cdot r$}                                
+  \end{tabular}}
+\only<8>{%
+  \begin{tabular}{lcl}
+    \bl{$der\,c\,(r\cdot r)$} & \bl{$\dn$} & \bl{$(der\,c\,r)\cdot r$}                                
+  \end{tabular}}
+\end{textblock}
+
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\begin{center}
+\begin{tabular}{ll@{\hspace{10mm}}l}
+& \bl{$r\{n\}$} & \bl{$der$}\hspace{20mm}\mbox{}\\\hline\\[-4mm]    
+\bl{$n = 0$} : & \bl{$\ONE$}           & \bl{$\ZERO$}\\
+\bl{$n = 1$} : & \bl{$r$}              & \bl{$(der\,c\,r)$}\\
+\bl{$n = 2$} : & \bl{$r\cdot r$}       & \bl{$(der\,c\,r)\cdot r$}\\
+\bl{$n = 3$} : & \bl{$r\cdot r\cdot r$}& \only<1>{???}\only<2->{\bl{$(der\,c\,r)\cdot r\cdot r$}}\\
+\quad\vdots  
+\end{tabular} 
+\end{center}
+
+\begin{textblock}{13}(1,11)
+\only<3->{%
+  \begin{tabular}{rcl}
+    \bl{$nullable(r\{n\})$} & \bl{$\dn$} &
+                          \bl{if\; $n = 0$\; then\; $true$\; else\; $nullable(r)$}\\
+    \bl{$der\,c\,(r\{n\})$} & \bl{$\dn$} &
+                          \bl{if\; $n = 0$\; then\; $\ZERO$\; else\; $(der\,c\,r)\cdot r\{n - 1\}$}
+  \end{tabular}}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\begin{textblock}{13}(1.5,4)
+  \only<1-6>{%
+  \begin{tabular}{@{}lcl@{}}
+    \bl{$der\,c\,(r\cdot r\cdot r)$}
+    & \bl{$\dn$} & \bl{if $nullable(r)$}\medskip\\
+    & & \only<1>{\bl{then\; $(der\,c\,r)\cdot r\cdot r + der\,c\,(r\cdot r)$}}%
+        \only<2>{\bl{then\; $(der\,c\,r)\cdot r\cdot r + (der\,c\,r)\cdot r$}}%
+        \only<3>{\bl{then\; $(der\,c\,r)\cdot (r\cdot r + r)$}}%
+        \only<4>{\bl{then\; $(der\,c\,r)\cdot (r\cdot (r + \ONE))$}}%
+        \only<5>{\bl{then\; $(der\,c\,r)\cdot (r\cdot r)$}}%
+        \only<6>{\bl{then\; $(der\,c\,r)\cdot r\cdot r$}}\medskip\\
+    & & \bl{else\; $(der\,c\,r)\cdot r\cdot r$}\\
+  \end{tabular}}
+  \only<7->{%
+  \begin{tabular}{@{}lcl@{}}
+    \bl{$der\,c\,(r\cdot r\cdot r)$}
+    & \bl{$\dn$} & \bl{$(der\,c\,r)\cdot r\cdot r$}%
+  \end{tabular}} 
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[t]
@@ -869,12 +981,12 @@
 \item extends to most regular expressions, for example
 \bl{$\sim r$} (next slide)
 
-\item is easy to implement in a functional language (slide after)
+\item is easy to implement in a functional language
 
 \item the algorithm is already quite old; there is still
   work to be done to use it as a tokenizer (that is relatively new work)
 
-\item we can prove its correctness\ldots (several slides later)
+\item we can prove its correctness\ldots (another video)
 \end{itemize}
 
 \end{frame}
@@ -889,10 +1001,9 @@
 \item \bl{$L(\sim{}r) \dn UNIV - L(r)$}\medskip
 \item \bl{$nullable (\sim{}r) \dn not\, (nullable(r))$}\medskip
 \item \bl{$der\,c\,(\sim{}r) \dn \;\sim{}(der\,c\,r)$}
-\end{itemize}\bigskip\pause
+\end{itemize}\bigskip\bigskip\medskip\pause
 
 Used often for recognising comments:
-
 \[
 \bl{/ \cdot * \cdot (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * \cdot /}
 \]
--- a/videos/02-algo1.srt	Tue Sep 29 21:52:52 2020 +0100
+++ b/videos/02-algo1.srt	Sat Oct 03 00:51:47 2020 +0100
@@ -5,28 +5,28 @@
 
 2
 00:00:09,700 --> 00:00:11,500
-This slide said, What is
+This slide said what is
 
 3
 00:00:11,500 --> 00:00:14,500
-all wreck expression Metro
+our regular expression matcher
 actually supposed to do?
 
 4
 00:00:14,500 --> 00:00:16,570
-It will take two arguments and
+It will take two arguments,
 
 5
 00:00:16,570 --> 00:00:18,670
-reg expression R and a string
+a regular expression r and a string s.
 
 6
 00:00:18,670 --> 00:00:21,580
-S. And it's supposed to say yes,
+And it's supposed to say yes,
 
 7
 00:00:21,580 --> 00:00:23,440
-the wreck expression matches
+the regular expression matches
 
 8
 00:00:23,440 --> 00:00:26,920
@@ -35,12 +35,12 @@
 
 9
 00:00:26,920 --> 00:00:28,855
-the language of R.
+the language of r.
 
 10
 00:00:28,855 --> 00:00:32,410
 And if the string is not
-in the language of our,
+in the language of r,
 
 11
 00:00:32,410 --> 00:00:35,515
@@ -57,13 +57,13 @@
 
 14
 00:00:39,565 --> 00:00:43,305
-this l Sometimes
+this L sometimes
 produces infinite sets.
 
 15
 00:00:43,305 --> 00:00:47,585
-And so we can test whether a
-string is an infinite set,
+And we can't test whether a
+string is in infinite set,
 
 16
 00:00:47,585 --> 00:00:50,090
@@ -85,11 +85,11 @@
 
 20
 00:00:57,050 --> 00:00:59,284
-on Rekha expressions instead.
+on regular expressions instead,
 
 21
 00:00:59,284 --> 00:01:03,275
-Because Weka expressions
+because regular expressions
 are always finite trees.
 
 22
@@ -99,11 +99,11 @@
 
 23
 00:01:05,870 --> 00:01:08,150
-clever is called brush-off ski.
+clever is called Brzozowski.
 
 24
 00:01:08,150 --> 00:01:11,435
-It's his, I've written,
+It's his algorithm
 I'm introducing here.
 
 25
@@ -123,12 +123,12 @@
 28
 00:01:20,104 --> 00:01:24,155
 And the idea is that
-this function nullable.
+this function nullable is
 
 29
 00:01:24,155 --> 00:01:26,450
-Testing whether
-the reg expression
+testing whether
+the regular expression
 
 30
 00:01:26,450 --> 00:01:27,950
@@ -149,7 +149,7 @@
 
 34
 00:01:35,465 --> 00:01:37,775
-This reg expression,
+This regular expression,
 the whole purpose
 
 35
@@ -163,7 +163,7 @@
 
 37
 00:01:41,645 --> 00:01:44,599
-If a reg expression
+If a regular expression
 can match a character,
 
 38
@@ -182,21 +182,21 @@
 
 41
 00:01:53,180 --> 00:01:56,960
-then either or one can
-match the empty string.
+then either r1 can
+match the empty string,
 
 42
 00:01:56,960 --> 00:01:59,720
-Or R2 can match the empty string.
+or r2 can match the empty string.
 
 43
 00:01:59,720 --> 00:02:03,110
 So either nullable
-of R1 has to hold,
+of r1 has to hold,
 
 44
 00:02:03,110 --> 00:02:06,945
-or nullable of R2 has to hold.
+or nullable of r2 has to hold.
 
 45
 00:02:06,945 --> 00:02:09,925
@@ -205,17 +205,17 @@
 
 46
 00:02:09,925 --> 00:02:12,790
-If this reg expression can
+If this regular expression can
 match the empty string,
 
 47
 00:02:12,790 --> 00:02:16,615
-then R1 has to be able to
+then r1 has to be able to
 match the empty string,
 
 48
 00:02:16,615 --> 00:02:20,290
-and R2 has to be able to
+and r2 has to be able to
 match the empty string.
 
 49
@@ -224,12 +224,12 @@
 
 50
 00:02:22,555 --> 00:02:25,660
-In one case it is o and
-the other case it's end.
+In one case it is or and
+the other case it is and.
 
 51
 00:02:25,660 --> 00:02:27,970
-In the store record
+The star regular
 expression can match
 
 52
@@ -244,7 +244,7 @@
 
 54
 00:02:33,340 --> 00:02:37,179
-and should not be too
+and it should not be too
 difficult to implement.
 
 55
@@ -273,11 +273,11 @@
 
 60
 00:02:57,305 --> 00:02:58,940
-we can't expect that as
+we can't expect that 
 
 61
 00:02:58,940 --> 00:03:03,260
-simple fix will solve all
+a simple fix will solve all
 the problems in the world.
 
 62
@@ -293,11 +293,11 @@
 64
 00:03:10,085 --> 00:03:12,140
 And it also just
-chose this preserves
+shows this Brzozowski
 
 65
 00:03:12,140 --> 00:03:14,345
-the is a very clever guy.
+is a very clever guy.
 
 66
 00:03:14,345 --> 00:03:15,800
@@ -335,21 +335,21 @@
 73
 00:03:34,685 --> 00:03:38,120
 Imagine you have a
-reexpression and it can
+regular expression and it can
 
 74
 00:03:38,120 --> 00:03:41,930
 match a string of the
-form C followed by as.
+form c followed by s.
 
 75
 00:03:41,930 --> 00:03:44,810
-So the C is the first
+So the c is the first
 character of that string.
 
 76
 00:03:44,810 --> 00:03:48,605
-So I mentioned that can
+So I imagine that r can
 match this kind of string.
 
 77
@@ -358,12 +358,12 @@
 
 78
 00:03:50,330 --> 00:03:52,400
-what would a wrecker
+what would a regular
 expression look
 
 79
 00:03:52,400 --> 00:03:54,695
-like that can match chest
+like that can match just
 
 80
 00:03:54,695 --> 00:03:57,140
@@ -372,7 +372,7 @@
 81
 00:03:57,140 --> 00:03:59,300
 that from the semantic
-that every relative,
+derivative,
 
 82
 00:03:59,300 --> 00:04:00,860
@@ -386,16 +386,16 @@
 84
 00:04:02,210 --> 00:04:04,940
 Here it's the same.
-If a reg expression
+If a regular expression
 
 85
 00:04:04,940 --> 00:04:07,835
 can match a string
-starting with a C,
+starting with a c,
 
 86
 00:04:07,835 --> 00:04:11,090
-we're looking for a wreck
+we're looking for a regular
 expression which can match
 
 87
@@ -410,7 +410,7 @@
 89
 00:04:17,690 --> 00:04:21,049
 the derivative of a
-wreck expression.
+regular expression.
 
 90
 00:04:21,049 --> 00:04:22,205
@@ -419,25 +419,25 @@
 91
 00:04:22,205 --> 00:04:25,460
 a character as argument
-and the rec expression.
+and a regular expression.
 
 92
 00:04:25,460 --> 00:04:28,730
 And in contrast to
-the semantic records,
+the semantic 
 
 93
 00:04:28,730 --> 00:04:31,310
-semantic derivative, which works
+derivative, which works
 
 94
 00:04:31,310 --> 00:04:34,430
 over languages or
-sets of strings.
+sets of strings,
 
 95
 00:04:34,430 --> 00:04:39,620
-This derivative works
+this derivative works
 over regular expressions.
 
 96
@@ -450,7 +450,7 @@
 
 98
 00:04:43,970 --> 00:04:46,370
-the structure of rec expressions.
+the structure of regular expressions.
 
 99
 00:04:46,370 --> 00:04:48,814
@@ -460,7 +460,7 @@
 100
 00:04:48,814 --> 00:04:52,700
 and the second one is
-a wreck expression.
+a regular expression.
 
 101
 00:04:52,700 --> 00:04:56,510
@@ -469,17 +469,17 @@
 
 102
 00:04:56,510 --> 00:05:00,680
-a wreck expression that
-can match everything.
+a regular expression that
+can match everything
 
 103
 00:05:00,680 --> 00:05:03,125
-This reg expression
+this regular expression
 as argument can match
 
 104
 00:05:03,125 --> 00:05:07,040
-except for the C. So now let's
+except for the c. So now let's
 look at this first case.
 
 105
@@ -494,17 +494,17 @@
 
 107
 00:05:14,270 --> 00:05:16,910
-a C. So we have to look
+a c. So we have to look
 
 108
 00:05:16,910 --> 00:05:20,090
-for and reg expression which
-can not much anything.
+for a regular expression which
+can not match anything.
 
 109
 00:05:20,090 --> 00:05:22,700
 Well, that's the purpose
-of this record expression,
+of this regular expression,
 
 110
 00:05:22,700 --> 00:05:24,815
@@ -513,7 +513,7 @@
 111
 00:05:24,815 --> 00:05:28,130
 In the next case,
-this reg expression
+this regular expression
 
 112
 00:05:28,130 --> 00:05:30,440
@@ -526,12 +526,12 @@
 
 114
 00:05:33,440 --> 00:05:36,350
-with C. So also in this case,
+with c. So also in this case,
 
 115
 00:05:36,350 --> 00:05:39,560
 we just define it as
-the bracket question,
+the regular expression,
 
 116
 00:05:39,560 --> 00:05:41,615
@@ -540,12 +540,12 @@
 117
 00:05:41,615 --> 00:05:45,170
 In the next case, this
-C as the argument to
+c is the argument to
 
 118
 00:05:45,170 --> 00:05:48,335
 the derivative and this d
-is the Rekha expression.
+is the regular expression.
 
 119
 00:05:48,335 --> 00:05:50,225
@@ -553,17 +553,17 @@
 
 120
 00:05:50,225 --> 00:05:53,525
-If this C and this D
-is actually equal.
+If this c and this d
+is actually equal,
 
 121
 00:05:53,525 --> 00:05:55,970
-That means this record
+Thaat means this regular
 expression can match
 
 122
 00:05:55,970 --> 00:05:59,240
-a string which just contains C0.
+a string which just contains c.
 
 123
 00:05:59,240 --> 00:06:01,505
@@ -571,7 +571,7 @@
 
 124
 00:06:01,505 --> 00:06:04,790
-motor remains is
+what remains is
 the empty string.
 
 125
@@ -586,7 +586,7 @@
 127
 00:06:09,170 --> 00:06:11,915
 Well, that's the
-purpose of the warm.
+purpose of the 1.
 
 128
 00:06:11,915 --> 00:06:15,440
@@ -594,7 +594,7 @@
 
 129
 00:06:15,440 --> 00:06:17,630
-then this reg expression
+then this regular expression
 
 130
 00:06:17,630 --> 00:06:19,220
@@ -603,34 +603,34 @@
 
 131
 00:06:19,220 --> 00:06:23,780
-a C. So again it will
+a c. So again it will
 be defined as just 0.
 
 132
 00:06:23,780 --> 00:06:29,390
-Okay? Now, the alternative case,
+Now, the alternative case,
 
 133
 00:06:29,390 --> 00:06:31,970
-if this reg expression can
+if this regular expression can
 
 134
 00:06:31,970 --> 00:06:36,050
-match the strings
-starting with C,
+match strings
+starting with c,
 
 135
 00:06:36,050 --> 00:06:40,820
 then it can either be
-matched by the Ongwen.
+matched by the r1
 
 136
 00:06:40,820 --> 00:06:44,495
-Or it can be much by the R2.
+or it can be matched by the r2.
 
 137
 00:06:44,495 --> 00:06:50,090
-If they are one can match C
+If the r1 can match c
 and then following a string,
 
 138
@@ -640,7 +640,7 @@
 
 139
 00:06:53,975 --> 00:06:56,570
-R to get a reg expression
+r1 to get a regular expression
 
 140
 00:06:56,570 --> 00:06:59,135
@@ -649,39 +649,39 @@
 
 141
 00:06:59,135 --> 00:07:02,690
-And the same we can do with R2.
+And the same we can do with r2.
 
 142
 00:07:02,690 --> 00:07:06,110
-You can find a reg expression
-which can match everything.
+You can find a regular expression
+which can match everything
 
 143
 00:07:06,110 --> 00:07:07,850
-This R2 can match,
+this r2 can match,
 
 144
 00:07:07,850 --> 00:07:09,710
-starting with C, bad,
+starting with c, but
 
 145
 00:07:09,710 --> 00:07:12,590
-which chopping off this C. Okay?
+we are chopping off this c. 
 
 146
 00:07:12,590 --> 00:07:16,370
 So now if you have to find
-the break expression,
+the regular expression,
 
 147
 00:07:16,370 --> 00:07:20,030
 which can match all the strings
-where C is tripped off.
+where c is chopped off,
 
 148
 00:07:20,030 --> 00:07:22,295
-Then we just have to
-take the alternatives
+then we just have to
+take the alternative
 
 149
 00:07:22,295 --> 00:07:24,965
@@ -689,11 +689,11 @@
 
 150
 00:07:24,965 --> 00:07:29,390
-Okay? Now to sequence case,
+Now to sequence case.
 
 151
 00:07:29,390 --> 00:07:33,005
-this sequence case is the
+This sequence case is the
 most complicated one.
 
 152
@@ -702,18 +702,18 @@
 
 153
 00:07:35,180 --> 00:07:39,335
-the second case where
-the Earth's brush.
+the second case, in the
+else branch.
 
 154
 00:07:39,335 --> 00:07:42,830
-Okay? So if this
+So if this
 regular expression
 
 155
 00:07:42,830 --> 00:07:46,145
 can match a string
-starting with C,
+starting with c,
 
 156
 00:07:46,145 --> 00:07:48,155
@@ -722,8 +722,8 @@
 
 157
 00:07:48,155 --> 00:07:51,905
-First, the R1 must have matched
-a string starting with C
+First, the r1 must have matched
+a string starting with c
 
 158
 00:07:51,905 --> 00:07:56,420
@@ -732,40 +732,40 @@
 
 159
 00:07:56,420 --> 00:07:59,660
-Okay? So in this case I only
+So in this case I only
 
 160
 00:07:59,660 --> 00:08:03,260
 have to call this
-derivative for this r one.
+derivative for this r1,
 
 161
 00:08:03,260 --> 00:08:06,830
-And find the reg expression
+and find the regular expression
 which can match everything
 
 162
 00:08:06,830 --> 00:08:11,555
-this R one can match except
-for this C chopped off.
+this r1 can match except
+for this c chopped off.
 
 163
 00:08:11,555 --> 00:08:15,830
-And I have to build that
+And I have to build the
 sequence derivative of that.
 
 164
 00:08:15,830 --> 00:08:18,260
-So that's what the As per se.
+So that's what the else branch says:
 
 165
 00:08:18,260 --> 00:08:21,860
-So I take the
-derivative of R1 and I
+I take the
+derivative of r1 and I
 
 166
 00:08:21,860 --> 00:08:23,480
-put the R2 on
+put the r2 on
 
 167
 00:08:23,480 --> 00:08:25,865
@@ -774,37 +774,37 @@
 
 168
 00:08:25,865 --> 00:08:29,240
-Ok? So that's the only
+So that's the only
 case we have to consider
 
 169
 00:08:29,240 --> 00:08:32,750
 this sequence case
-except if not the,
+except if not the
 
 170
 00:08:32,750 --> 00:08:37,895
-how one can match the
-sea and something else.
+r1 can match the
+c and something else.
 
 171
 00:08:37,895 --> 00:08:42,965
-But if on mismatching
-actually the empty string,
+But if r1 is matching
+actually the empty string.
 
 172
 00:08:42,965 --> 00:08:48,890
-this case actually the R two
+In this case actually the r2
 is in charge of matching
 
 173
 00:08:48,890 --> 00:08:51,590
-the string starting with C. So in
+the string starting with c. So in
 
 174
 00:08:51,590 --> 00:08:55,490
 this case we have to call
-the derivative for R2.
+the derivative for r2.
 
 175
 00:08:55,490 --> 00:08:57,875
@@ -813,7 +813,7 @@
 
 176
 00:08:57,875 --> 00:09:00,455
-So if R1 is nullable,
+So if r1 is nullable,
 
 177
 00:09:00,455 --> 00:09:03,245
@@ -827,20 +827,20 @@
 
 179
 00:09:05,330 --> 00:09:08,045
-this R2 is matching a string
+this r2 is matching a string
 
 180
 00:09:08,045 --> 00:09:10,700
-starting with C. And so we have
+starting with c. And so we have
 
 181
 00:09:10,700 --> 00:09:14,210
 to call the derivative
-for this R2 in this case.
+for this r2 in this case.
 
 182
 00:09:14,210 --> 00:09:18,680
-Otherwise, the R1 will
+Otherwise, the r1 will
 be in charge of matching
 
 183
@@ -850,22 +850,22 @@
 
 184
 00:09:20,840 --> 00:09:24,695
-C. And I have to call the
-derivative on this R1.
+c. And I have to call the
+derivative on this r1.
 
 185
 00:09:24,695 --> 00:09:30,670
-Okay? I hope this makes sense.
+I hope this makes sense.
 
 186
 00:09:30,670 --> 00:09:34,150
 Go over that and
-also the handouts.
+also the handouts
 
 187
 00:09:34,150 --> 00:09:37,465
-Again. With that, that's
-it's really subtle.
+again. That case
+is really subtle.
 
 188
 00:09:37,465 --> 00:09:40,945
@@ -886,18 +886,18 @@
 
 192
 00:09:48,160 --> 00:09:53,275
-the else branch where pushes
-NG derivative over the R1.
+the else branch where it pushes
+the derivative over the r1.
 
 193
 00:09:53,275 --> 00:09:55,780
-So are usually write
-down the S punch
+So I usually write
+down the else branch first,
 
 194
 00:09:55,780 --> 00:09:59,650
-first and then construct
-the thin branch by saying,
+and then construct
+the then branch by saying,
 
 195
 00:09:59,650 --> 00:10:01,525
@@ -911,25 +911,25 @@
 197
 00:10:03,760 --> 00:10:06,580
 have to build also
-derivative of R2.
+derivative of r2.
 
 198
 00:10:06,580 --> 00:10:12,695
-Okay? Finally, the star case.
+Finally, the star case.
 
 199
 00:10:12,695 --> 00:10:15,665
-Ok. So here again
+So here again
 we're looking for
 
 200
 00:10:15,665 --> 00:10:17,300
-a reg expression which
+a regular expression which
 
 201
 00:10:17,300 --> 00:10:19,745
 can match the string
-starting with C,
+starting with c,
 
 202
 00:10:19,745 --> 00:10:22,355
@@ -938,13 +938,13 @@
 
 203
 00:10:22,355 --> 00:10:28,640
-So if r star has to match
-a string starting with C,
+So if r* has to match
+a string starting with c,
 
 204
 00:10:28,640 --> 00:10:32,735
 then at least we need one
-copy of this r. Okay?
+copy of this r. 
 
 205
 00:10:32,735 --> 00:10:34,310
@@ -957,21 +957,21 @@
 
 207
 00:10:37,010 --> 00:10:38,870
-a C and then afterwards
+a c and then afterwards
 
 208
 00:10:38,870 --> 00:10:41,570
 come 0 more copies
-of this OnStar.
+of this r*.
 
 209
 00:10:41,570 --> 00:10:45,530
-Okay? What we do there
-is we trying to find
+What we do there
+is we are trying to find
 
 210
 00:10:45,530 --> 00:10:47,960
-the Rekha expression
+the regular expression
 which can match
 
 211
@@ -981,7 +981,7 @@
 212
 00:10:50,915 --> 00:10:53,255
 However, where the
-sea is chopped off.
+c is chopped off.
 
 213
 00:10:53,255 --> 00:10:55,130
@@ -998,12 +998,12 @@
 
 216
 00:10:59,030 --> 00:11:02,600
-this R. So that's why
+this r. So that's why
 it's defined like this.
 
 217
 00:11:02,600 --> 00:11:09,050
-Okay? S8. Please take care
+Okay? ...As said, please take care
 with this definition.
 
 218
@@ -1029,8 +1029,8 @@
 
 223
 00:11:20,825 --> 00:11:24,695
-Okay, let's look
-first some examples.
+Let's look
+first at some examples.
 
 224
 00:11:24,695 --> 00:11:27,140
@@ -1038,7 +1038,7 @@
 
 225
 00:11:27,140 --> 00:11:29,390
-R. And let's have
+r. And let's have
 
 226
 00:11:29,390 --> 00:11:32,450
@@ -1047,17 +1047,17 @@
 
 227
 00:11:32,450 --> 00:11:38,405
-B, and C. And Vishal do
-with d1 for the a. Ok.
+b, and c. And we shall do
+the one for a. 
 
 228
 00:11:38,405 --> 00:11:42,379
-So here is our reg expression
+So here is our regular expression
 
 229
 00:11:42,379 --> 00:11:45,334
-and was very generous
-with dependent a thesis.
+and I was very generous
+with parentheses.
 
 230
 00:11:45,334 --> 00:11:48,140
@@ -1065,30 +1065,30 @@
 
 231
 00:11:48,140 --> 00:11:52,550
-So if people now the
+So we now build the
 derivative according to a,
 
 232
 00:11:52,550 --> 00:11:55,474
-the character a of
-that wreck expression.
+the character a, of
+that regular expression.
 
 233
 00:11:55,474 --> 00:11:57,380
-Okay? So the first thing we
+So the first thing we
 
 234
 00:11:57,380 --> 00:11:59,555
-have to analyze is the K star.
+have to analyse is the case star.
 
 235
 00:11:59,555 --> 00:12:04,370
-Ok? So here's direct expression,
+So here's the regular expression,
 which we are looking at.
 
 236
 00:12:04,370 --> 00:12:09,170
-This are the outermost
+This r and the outermost
 constructor is this star.
 
 237
@@ -1101,11 +1101,11 @@
 
 239
 00:12:13,625 --> 00:12:16,340
-then this star case is defined
+then this star-case is defined
 
 240
 00:12:16,340 --> 00:12:20,000
-as u taking just the
+as you're taking just the
 inside of the star
 
 241
@@ -1114,7 +1114,7 @@
 
 242
 00:12:23,030 --> 00:12:26,765
-leave the are on the
+leave the r on the
 outside at the end.
 
 243
@@ -1128,7 +1128,7 @@
 245
 00:12:32,030 --> 00:12:36,035
 the derivative according to
-a of this record expression,
+a of this regular expression,
 
 246
 00:12:36,035 --> 00:12:38,000
@@ -1149,38 +1149,38 @@
 
 250
 00:12:45,185 --> 00:12:47,705
-into the a, followed by b.
+into the a followed by b.
 
 251
 00:12:47,705 --> 00:12:49,145
-And in this segment,
+And into the second
 
 252
 00:12:49,145 --> 00:12:51,185
-alternative into b. Ok,
+alternative, into b. 
 
 253
 00:12:51,185 --> 00:12:56,030
-so we take the derivative
-of each according to a way.
+We take the derivative
+of each according to a.
 
 254
 00:12:56,030 --> 00:13:00,635
 Now this one is a sequence
-break expression.
+regular expression.
 
 255
 00:13:00,635 --> 00:13:02,210
-This most complicated case.
+This was the complicated case.
 
 256
 00:13:02,210 --> 00:13:04,160
-So the first of all
-you have to test is,
+First of all
+we have to test is
 
 257
 00:13:04,160 --> 00:13:07,910
-is the first component
+the first component
 nullable of this sequence?
 
 258
@@ -1194,7 +1194,7 @@
 
 260
 00:13:12,740 --> 00:13:14,210
-So vn, the easy case,
+So we are the easy case,
 
 261
 00:13:14,210 --> 00:13:17,000
@@ -1220,7 +1220,7 @@
 
 266
 00:13:29,720 --> 00:13:31,715
-in the regular expression agree.
+and the regular expression agree.
 
 267
 00:13:31,715 --> 00:13:33,890
@@ -1232,11 +1232,11 @@
 
 269
 00:13:37,550 --> 00:13:39,710
-the break expression is P,
+the regular expression is b,
 
 270
 00:13:39,710 --> 00:13:41,675
-But the characters a.
+but the characters a.
 
 271
 00:13:41,675 --> 00:13:46,385
@@ -1259,23 +1259,22 @@
 
 275
 00:13:55,280 --> 00:13:58,295
-This expression is that
-star at expression.
+This regular expression is that star.
 
 276
 00:13:58,295 --> 00:14:02,780
-Ok? So the derivative
+So the derivative
 according to a
 
 277
 00:14:02,780 --> 00:14:07,610
-up that reg expression
+of that regular expression
 is this expression.
 
 278
 00:14:07,610 --> 00:14:10,970
 We just have to
-substitute this back in.
+substitute this r back in.
 
 279
 00:14:10,970 --> 00:14:13,745
@@ -1283,7 +1282,7 @@
 
 280
 00:14:13,745 --> 00:14:16,160
-So far, they're only analyze
+So far, we only analyzes
 
 281
 00:14:16,160 --> 00:14:19,505
@@ -1307,7 +1306,7 @@
 285
 00:14:27,905 --> 00:14:30,875
 we just return the
-Rekha expression.
+regular expression.
 
 286
 00:14:30,875 --> 00:14:35,585
@@ -1317,11 +1316,11 @@
 287
 00:14:35,585 --> 00:14:37,850
 remember that can
-be any character.
+be any character,
 
 288
 00:14:37,850 --> 00:14:42,170
-Then we build first the simple
+then we build first the simple
 derivative according to
 
 289
@@ -1336,12 +1335,12 @@
 291
 00:14:46,925 --> 00:14:50,615
 So here you see again,
-my personal convention.
+my personal convention:
 
 292
 00:14:50,615 --> 00:14:54,365
-Everything which works on
-lists has this S at the end.
+everything which works on
+lists has this s at the end.
 
 293
 00:14:54,365 --> 00:14:57,125
@@ -1387,7 +1386,7 @@
 
 302
 00:15:20,600 --> 00:15:23,465
-So the pro shops ki mantra
+So the Brzozowski matcher
 
 303
 00:15:23,465 --> 00:15:24,860
@@ -1401,15 +1400,15 @@
 305
 00:15:26,915 --> 00:15:30,920
 And is supposed to say yes if
-the reg expression matches
+the regular expression matches
 
 306
 00:15:30,920 --> 00:15:33,560
-the string or No
+the string or no
 
 307
 00:15:33,560 --> 00:15:36,065
-if the reg expression does
+if the regular expression does
 not match the string.
 
 308
@@ -1419,43 +1418,43 @@
 309
 00:15:37,715 --> 00:15:42,560
 Well, it takes this string
-s And this reg expression,
+s and this regular expression,
 
 310
 00:15:42,560 --> 00:15:43,925
-and it first built
+and it first builds
 
 311
 00:15:43,925 --> 00:15:48,845
 successive derivatives until
-that string is exhaust that.
+that string is exhausted.
 
 312
 00:15:48,845 --> 00:15:52,115
-Okay? Then you have
+Then you have
 a final derivative,
 
 313
 00:15:52,115 --> 00:15:53,839
-a final reg expression.
+a final regular expression
 
 314
 00:15:53,839 --> 00:15:55,370
-And you test whether
+and you test whether
 
 315
 00:15:55,370 --> 00:15:57,920
-this reg expression can
+this regular expression can
 match the empty string.
 
 316
 00:15:57,920 --> 00:16:01,370
 If yes, then the
-original reg expression
+original regular expression,
 
 317
 00:16:01,370 --> 00:16:03,245
-is r can match the string.
+this r, can match the string.
 
 318
 00:16:03,245 --> 00:16:05,210
@@ -1468,12 +1467,12 @@
 
 320
 00:16:08,030 --> 00:16:10,280
-then know this
-regular expression,
+then no this
+regular expression r
 
 321
 00:16:10,280 --> 00:16:12,905
-R cannot match that string.
+cannot match that string.
 
 322
 00:16:12,905 --> 00:16:16,010
@@ -1491,11 +1490,11 @@
 
 325
 00:16:22,760 --> 00:16:25,340
-So how does the algorithm work?
+So how does the algorithm work
 
 326
 00:16:25,340 --> 00:16:27,634
-In a concrete example?
+in a concrete example?
 
 327
 00:16:27,634 --> 00:16:31,580
@@ -1503,13 +1502,13 @@
 
 328
 00:16:31,580 --> 00:16:34,370
-and you have a break
-expression, say R1.
+and you have a regular
+expression, say r1.
 
 329
 00:16:34,370 --> 00:16:37,520
 And you want to find out
-whether this or one can match
+whether this r1 can match
 
 330
 00:16:37,520 --> 00:16:41,300
@@ -1519,7 +1518,7 @@
 331
 00:16:41,300 --> 00:16:45,140
 Well, you would first
-take this R1 and you
+take this r1 and you
 
 332
 00:16:45,140 --> 00:16:47,150
@@ -1527,7 +1526,7 @@
 
 333
 00:16:47,150 --> 00:16:49,880
-to the first character, D-A.
+to the first character, the a.
 
 334
 00:16:49,880 --> 00:16:53,015
@@ -1535,17 +1534,17 @@
 
 335
 00:16:53,015 --> 00:16:55,294
-which I call here R2.
+which I call here r2.
 
 336
 00:16:55,294 --> 00:16:58,640
 Then you take the next
-character, the B.
+character, the b.
 
 337
 00:16:58,640 --> 00:17:04,535
 You now build the derivative
-according to b of this R2.
+according to b of this r2.
 
 338
 00:17:04,535 --> 00:17:07,460
@@ -1564,16 +1563,16 @@
 341
 00:17:11,810 --> 00:17:17,075
 Then you do this also with c.
-So you get a derivative R3,
+So you get a derivative r3,
 
 342
 00:17:17,075 --> 00:17:22,460
 and you build the derivative
-of R three according to c,
+of r3 according to c,
 
 343
 00:17:22,460 --> 00:17:24,185
-you get an R four.
+you get an r4.
 
 344
 00:17:24,185 --> 00:17:26,300
@@ -1587,12 +1586,12 @@
 346
 00:17:27,530 --> 00:17:29,570
 We build derivatives
-according to a, B,
+according to a, b,
 
 347
 00:17:29,570 --> 00:17:34,610
-and C. Now we just test whether
-this r four is nullable.
+and c. Now we just test whether
+this r4 is nullable.
 
 348
 00:17:34,610 --> 00:17:37,175
@@ -1600,16 +1599,16 @@
 
 349
 00:17:37,175 --> 00:17:41,510
-then df break expression metro
+then the regular expression matcher
 will just say true, yes,
 
 350
 00:17:41,510 --> 00:17:43,340
-this original reg expression,
+this original regular expression,
 
 351
 00:17:43,340 --> 00:17:47,270
-the R1, will be able to
+the r1, will be able to
 match that string abc.
 
 352
@@ -1622,28 +1621,28 @@
 
 354
 00:17:53,015 --> 00:17:56,975
-This reg expression will
+This regular expression will
 not match the string.
 
 355
 00:17:56,975 --> 00:18:00,260
-Ok, you might ask
-why on earth does
+Ok, you might ask:
+Why on earth does
 
 356
 00:18:00,260 --> 00:18:02,960
 that algorithm
-actually work away?
+actually work?
 
 357
 00:18:02,960 --> 00:18:06,515
-Here's an explanation
+Here's anather explanation
 for why it works.
 
 358
 00:18:06,515 --> 00:18:10,190
-Imagine you have a wreck
-expression R1, okay?
+Imagine you have a regular
+expression r1, okay?
 
 359
 00:18:10,190 --> 00:18:13,220
@@ -1655,7 +1654,7 @@
 
 361
 00:18:14,270 --> 00:18:17,180
-whether one can
+whether r1 can
 match that string.
 
 362
@@ -1673,7 +1672,7 @@
 
 365
 00:18:26,315 --> 00:18:30,185
-R will actually
+r1 will actually
 contain that string,
 
 366
@@ -1682,7 +1681,7 @@
 
 367
 00:18:31,805 --> 00:18:36,710
-Okay? So ABC is in
+Okay? So abc is in
 this language, okay?
 
 368
@@ -1693,11 +1692,11 @@
 369
 00:18:39,785 --> 00:18:43,145
 that means I look at all
-the strings in this f,
+the strings in this L(r1)
 
 370
 00:18:43,145 --> 00:18:46,820
-R1, and further out
+and filter out
 
 371
 00:18:46,820 --> 00:18:48,740
@@ -1710,7 +1709,7 @@
 
 373
 00:18:51,260 --> 00:18:54,545
-And I only look the one
+And I only look at the ones
 which start with an a.
 
 374
@@ -1724,12 +1723,12 @@
 376
 00:18:58,475 --> 00:19:01,025
 So after this
-romantic derivative,
+semantic derivative,
 
 377
 00:19:01,025 --> 00:19:05,735
 this set of strings will
-contain just B and C.
+contain just bc.
 
 378
 00:19:05,735 --> 00:19:12,830
@@ -1743,17 +1742,17 @@
 380
 00:19:14,345 --> 00:19:16,850
 all the strings which
-start with a P,
+start with a b,
 
 381
 00:19:16,850 --> 00:19:21,350
 and forget about everything
-else of the ones.
+else. Of the remaining ones
 
 382
 00:19:21,350 --> 00:19:27,905
-I know they start with B.
-I just chop of the B. Ok.
+I know they start with b.
+I just chop of the b. Ok.
 
 383
 00:19:27,905 --> 00:19:31,655
@@ -1776,7 +1775,7 @@
 387
 00:19:44,420 --> 00:19:47,300
 because I want to find out
-whether ABC is involved.
+whether abc is matched.
 
 388
 00:19:47,300 --> 00:19:50,540
@@ -1790,15 +1789,15 @@
 390
 00:19:52,820 --> 00:19:55,340
 them whether they start
-with a C. If yes,
+with a c. If yes,
 
 391
 00:19:55,340 --> 00:19:56,885
-I chop off the sea.
+I chop off the c.
 
 392
 00:19:56,885 --> 00:19:59,120
-And put in markets remaining.
+And put in what is remaining.
 
 393
 00:19:59,120 --> 00:20:00,425
@@ -1811,11 +1810,11 @@
 395
 00:20:02,510 --> 00:20:04,550
 in this language and
-I chop off this,
+I chop off this c,
 
 396
 00:20:04,550 --> 00:20:07,700
-see what is remaining
+what is remaining
 is the empty string.
 
 397
@@ -1830,11 +1829,11 @@
 399
 00:20:14,510 --> 00:20:18,800
 If yes, then the
-original R1 can match
+original r1 can match
 
 400
 00:20:18,800 --> 00:20:21,050
-this ABC because this ABC
+this abc because this abc
 
 401
 00:20:21,050 --> 00:20:24,119
@@ -1843,20 +1842,20 @@
 402
 00:20:24,130 --> 00:20:28,565
 And if in the end there wasn't
-the empty string, then,
+the empty string, then
 
 403
 00:20:28,565 --> 00:20:33,575
-then this ABC Watson in
-this language of one.
+this abs was not in
+this language of r1.
 
 404
 00:20:33,575 --> 00:20:36,260
-And so the electron must have,
+And so the lexer must have,
 
 405
 00:20:36,260 --> 00:20:38,880
-or the metro must have failed.
+or the matcher must have failed.
 
 406
 00:20:39,040 --> 00:20:42,530
@@ -1883,18 +1882,18 @@
 
 411
 00:20:55,730 --> 00:20:58,880
-Yeah, that's just
-explaining the idea with
+That's just
+explaining the idea. 
 
 412
 00:20:58,880 --> 00:21:02,525
-preserves key
+What Brzozowski
 achieved was that he
 
 413
 00:21:02,525 --> 00:21:06,440
 now works with this derivative
-America expressions and
+regular expressions and
 
 414
 00:21:06,440 --> 00:21:10,715
@@ -1904,12 +1903,12 @@
 415
 00:21:10,715 --> 00:21:14,135
 Because remember if you
-have an wreck expression,
+have a regular expression,
 
 416
 00:21:14,135 --> 00:21:17,405
-are you want to test
-whether can match APC,
+and you want to test
+whether it can match abc,
 
 417
 00:21:17,405 --> 00:21:22,550
@@ -1918,12 +1917,12 @@
 
 418
 00:21:22,550 --> 00:21:25,760
-So you will get a wreck
+So you will get a regular
 expression which can match b
 
 419
 00:21:25,760 --> 00:21:29,464
-and c If R could match abc.
+and c, if r could match abc.
 
 420
 00:21:29,464 --> 00:21:31,430
@@ -1931,17 +1930,17 @@
 
 421
 00:21:31,430 --> 00:21:33,620
-you will get a wreck expression
-which can match B and
+you will get a regular expression
+which can match b and
 
 422
 00:21:33,620 --> 00:21:37,070
-C. If you take the
+c. If you take the
 second derivative,
 
 423
 00:21:37,070 --> 00:21:41,225
-you will get a reexpression
+you will get a regular expression
 which can match c alone.
 
 424
@@ -1955,7 +1954,7 @@
 
 426
 00:21:46,070 --> 00:21:48,260
-rec expression which hopefully
+a regular expression which hopefully
 
 427
 00:21:48,260 --> 00:21:49,715
@@ -1964,7 +1963,7 @@
 428
 00:21:49,715 --> 00:21:53,780
 If it does, then this
-R can match the ABC.
+r can match the abc.
 
 429
 00:21:53,780 --> 00:21:55,655
@@ -1972,8 +1971,8 @@
 
 430
 00:21:55,655 --> 00:21:58,680
-ABC couldn't be
-matched by this on.
+abc couldn't be
+matched by this r.
 
 431
 00:21:58,900 --> 00:22:02,990
@@ -1982,11 +1981,11 @@
 
 432
 00:22:02,990 --> 00:22:06,050
-Here's defile RE1.
+Here's the file re1.sc.
 
 433
 00:22:06,050 --> 00:22:07,940
-It's also uploaded on Keith,
+It's also uploaded on KEATS,
 
 434
 00:22:07,940 --> 00:22:10,625
@@ -1995,26 +1994,26 @@
 
 435
 00:22:10,625 --> 00:22:13,970
-And actually I already saw
+And actually you already saw
 that file because I showed you
 
 436
 00:22:13,970 --> 00:22:15,710
-how my wreck expressions are
+how my regular expressions are
 
 437
 00:22:15,710 --> 00:22:17,960
-defined with the
+defined, with the
 abstract classes here.
 
 438
 00:22:17,960 --> 00:22:21,155
 And here, the six cases
-for 0-1 character,
+for 0, 1, character,
 
 439
 00:22:21,155 --> 00:22:23,540
-I turn a TIF in
+alternative, 
 sequence and star.
 
 440
@@ -2043,20 +2042,20 @@
 
 445
 00:22:37,040 --> 00:22:38,900
-are serious defined as false.
+0 is defined as false.
 
 446
 00:22:38,900 --> 00:22:43,234
-One is defined as true
-character for any character,
+One is defined as true.
+Character, for any character,
 
 447
 00:22:43,234 --> 00:22:45,455
-this null return false.
+this nullable will return false.
 
 448
 00:22:45,455 --> 00:22:47,540
-The alternative is to find here,
+The alternative is defined here,
 
 449
 00:22:47,540 --> 00:22:50,000
@@ -2065,12 +2064,12 @@
 450
 00:22:50,000 --> 00:22:52,700
 And for the sequence,
-that's the end.
+that's the and.
 
 451
 00:22:52,700 --> 00:22:56,180
 And this star, no matter
-what the reg expression is,
+what the regular expression is,
 
 452
 00:22:56,180 --> 00:22:59,540
@@ -2079,7 +2078,7 @@
 
 453
 00:22:59,540 --> 00:23:02,225
-So nanobots, very easy.
+So nullable is very easy.
 
 454
 00:23:02,225 --> 00:23:07,430
@@ -2093,11 +2092,11 @@
 456
 00:23:08,974 --> 00:23:11,810
 a character and the
-rec expression,
+regular expression,
 
 457
 00:23:11,810 --> 00:23:14,405
-and returns a wreck expression.
+and returns a regular expression.
 
 458
 00:23:14,405 --> 00:23:16,340
@@ -2105,32 +2104,32 @@
 
 459
 00:23:16,340 --> 00:23:18,890
-what's that reg
+what's that regular
 expression looks like.
 
 460
 00:23:18,890 --> 00:23:23,870
 If it's 0, we return
-01, we return 0.
+0; if it's 1 we return 0.
 
 461
 00:23:23,870 --> 00:23:26,990
-If the character is the
-wreck expressions character
+If the character... If the
+regular expressions character
 
 462
 00:23:26,990 --> 00:23:30,260
-d. We test whether it's
-equal to this character.
+d, we test whether it's
+equal to this character
 
 463
 00:23:30,260 --> 00:23:32,225
-We want to take the
+we want to take the
 derivative off.
 
 464
 00:23:32,225 --> 00:23:36,185
-If yes, return one, otherwise 0.
+If yes, return 1, otherwise 0.
 
 465
 00:23:36,185 --> 00:23:38,600
@@ -2161,17 +2160,17 @@
 471
 00:23:49,040 --> 00:23:52,160
 the derivative over
-the first DR1.
+the first, the r1.
 
 472
 00:23:52,160 --> 00:23:56,135
-And otherwise if it is null
-above we have two cases.
+And otherwise if it is nullable,
+we have two cases.
 
 473
 00:23:56,135 --> 00:24:00,605
 Either you have to push
-it over the R1 or R2.
+it over the r1 or r2.
 
 474
 00:24:00,605 --> 00:24:03,860
@@ -2184,8 +2183,8 @@
 
 476
 00:24:07,160 --> 00:24:11,600
-R1 and append the
-or as a sequence,
+r1 and append the
+r1 as a sequence,
 
 477
 00:24:11,600 --> 00:24:14,195
@@ -2193,7 +2192,7 @@
 
 478
 00:24:14,195 --> 00:24:19,430
-Okay, so here's this
+Okay, so here's the
 function for strings.
 
 479
@@ -2202,17 +2201,17 @@
 
 480
 00:24:21,740 --> 00:24:23,870
-This function takes N,
+This function takes an s,
 
 481
 00:24:23,870 --> 00:24:26,480
-S list of characters argument
-and a reg expression
+a list of characters as argument
+and a regular expression
 
 482
 00:24:26,480 --> 00:24:29,360
 as argument and returns
-a wreck expression.
+a regular expression.
 
 483
 00:24:29,360 --> 00:24:31,460
@@ -2231,12 +2230,12 @@
 486
 00:24:35,360 --> 00:24:38,030
 it just immediately
-return the R. If
+return the r. If
 
 487
 00:24:38,030 --> 00:24:42,020
 it's something of the
-form C followed by S,
+form c followed by s,
 
 488
 00:24:42,020 --> 00:24:45,050
@@ -2245,12 +2244,12 @@
 
 489
 00:24:45,050 --> 00:24:48,345
-to a C. And then
+to c. And then
 the result of that,
 
 490
 00:24:48,345 --> 00:24:50,090
-people recursively call
+we recursively call
 
 491
 00:24:50,090 --> 00:24:53,675
@@ -2259,11 +2258,11 @@
 
 492
 00:24:53,675 --> 00:24:56,060
-And now the main mantra,
+And now the main matcher,
 
 493
 00:24:56,060 --> 00:24:59,360
-it takes a reg
+it takes a regular
 expression and takes
 
 494
@@ -2283,7 +2282,7 @@
 497
 00:25:07,430 --> 00:25:09,410
 because I'm implementing
-it and scholar,
+it in Scala,
 
 498
 00:25:09,410 --> 00:25:15,064
@@ -2296,7 +2295,7 @@
 
 500
 00:25:16,580 --> 00:25:20,810
-the two list of the S that
+the toList of the s. That
 gives me a list of characters.
 
 501
@@ -2305,7 +2304,7 @@
 
 502
 00:25:23,135 --> 00:25:26,045
-is ds because I'm
+with the s, because I'm
 looking at strings.
 
 503
@@ -2314,7 +2313,7 @@
 
 504
 00:25:28,160 --> 00:25:33,050
-built-in nullable of
+built the nullable of
 the final derivative.
 
 505
@@ -2365,8 +2364,8 @@
 
 515
 00:26:00,305 --> 00:26:02,720
-B, and C of this
-record expression.
+b, and c of this
+regular expression.
 
 516
 00:26:02,720 --> 00:26:07,040
@@ -2376,16 +2375,16 @@
 517
 00:26:07,040 --> 00:26:09,260
 Now of course we want
-to see whether B
+to see whether we
 
 518
 00:26:09,260 --> 00:26:11,390
 do any better with
-this algorithm.
+this algorithm...
 
 519
 00:26:11,390 --> 00:26:13,715
-Then Python and Ruby.
+than Python and Ruby.
 
 520
 00:26:13,715 --> 00:26:16,070
@@ -2394,7 +2393,7 @@
 
 521
 00:26:16,070 --> 00:26:18,079
-So this reg expression.
+So this regular expression.
 
 522
 00:26:18,079 --> 00:26:19,880
@@ -2402,23 +2401,23 @@
 
 523
 00:26:19,880 --> 00:26:22,070
-our reg expression
-metro so far we have
+our regular expression
+matcher so far we have
 
 524
 00:26:22,070 --> 00:26:24,530
-the sequence right
-expression where we
+the sequence rregular
+expression, but we
 
 525
 00:26:24,530 --> 00:26:27,200
 don't have this optional
-wreck expression.
+regular expression.
 
 526
 00:26:27,200 --> 00:26:30,800
 And we don't have this n
-times Rekha expression,
+times regular expression,
 
 527
 00:26:30,800 --> 00:26:36,605
@@ -2428,7 +2427,7 @@
 528
 00:26:36,605 --> 00:26:40,549
 So if you want to build the
-optional reg expression,
+optional regular expression,
 
 529
 00:26:40,549 --> 00:26:41,870
@@ -2441,7 +2440,7 @@
 
 531
 00:26:44,570 --> 00:26:47,360
-returns the alternative of R one.
+returns the alternative of r and 1.
 
 532
 00:26:47,360 --> 00:26:49,624
@@ -2450,22 +2449,21 @@
 
 533
 00:26:49,624 --> 00:26:53,240
-of optional R and
+of optional r and
 replaces it by that.
 
 534
 00:26:53,240 --> 00:26:56,150
-The end times what we
-essentially do it,
+The n-times what we
+essentially do there is
 
 535
 00:26:56,150 --> 00:27:01,535
-There's beaches built n
-copies of this r. Ok?
+we built n copies of this r. Ok?
 
 536
 00:27:01,535 --> 00:27:04,745
-So if this n times was 0,
+So if this n-times was 0,
 
 537
 00:27:04,745 --> 00:27:06,245
@@ -2474,11 +2472,11 @@
 538
 00:27:06,245 --> 00:27:11,570
 If it's one, then we return
-just the reg expression.
+just the regular expression.
 
 539
 00:27:11,570 --> 00:27:15,575
-And if it's a, something
+And if it's something
 bigger than one,
 
 540
@@ -2493,25 +2491,25 @@
 542
 00:27:20,270 --> 00:27:22,925
 this function again
-with n minus one.
+with n - 1.
 
 543
 00:27:22,925 --> 00:27:26,330
-So we just now n copies of that.
+So we just build now n-copies of that.
 
 544
 00:27:26,330 --> 00:27:30,710
-Okay? Okay, so we can look
+Okay, so we can look
 at our first example.
 
 545
 00:27:30,710 --> 00:27:32,629
-This is the work expression,
+This is the regular expression,
 
 546
 00:27:32,629 --> 00:27:36,035
 and I call that here
-even rec expression1.
+evil regular expression1.
 
 547
 00:27:36,035 --> 00:27:37,670
@@ -2533,7 +2531,7 @@
 
 551
 00:27:43,640 --> 00:27:45,724
-Here's the reg expression,
+Here's the regular expression,
 
 552
 00:27:45,724 --> 00:27:49,520
@@ -2546,26 +2544,26 @@
 
 554
 00:27:53,180 --> 00:27:57,845
-this reg expression with
-strings of just containing A's.
+this regular expression with
+strings just containing a's.
 
 555
 00:27:57,845 --> 00:28:00,020
-And we first a bit cautious,
+And we are first a bit cautious,
 
 556
 00:28:00,020 --> 00:28:04,985
-be tested between 020
-and be count by two.
+we test it between 0 and 20,
+and we count by two.
 
 557
 00:28:04,985 --> 00:28:08,330
 And I actually will not
-start that with Scala,
+start that within Scala,
 
 558
 00:28:08,330 --> 00:28:12,965
-but actually the orbital online.
+but actually use Ammonite.
 
 559
 00:28:12,965 --> 00:28:15,305
@@ -2573,16 +2571,16 @@
 
 560
 00:28:15,305 --> 00:28:17,269
-And that correlates.
+And that calculates
 
 561
 00:28:17,269 --> 00:28:20,675
-So for 18 days it's pretty fast.
+for 18 a's. It's pretty fast.
 
 562
 00:28:20,675 --> 00:28:24,815
 But for 20 it already
-needs to seconds.
+needs 2 seconds.
 
 563
 00:28:24,815 --> 00:28:28,265
@@ -2592,7 +2590,7 @@
 564
 00:28:28,265 --> 00:28:32,825
 18 to 20 in the runtime
-is prepared to.
+is pretty bad too.
 
 565
 00:28:32,825 --> 00:28:37,460
@@ -2609,7 +2607,7 @@
 
 568
 00:28:41,255 --> 00:28:45,830
-we actually inverse as Python
+we are actually worse than Python
 and Ruby in this example.
 
 569
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/videos/02-algo2.srt	Sat Oct 03 00:51:47 2020 +0100
@@ -0,0 +1,1772 @@
+1
+00:00:06,020 --> 00:00:09,945
+They come back after
+this disappointment
+
+2
+00:00:09,945 --> 00:00:14,115
+and case of over promising
+and under achieving.
+
+3
+00:00:14,115 --> 00:00:17,295
+Let's have a look whether
+we can recover from that.
+
+4
+00:00:17,295 --> 00:00:20,535
+So here's one problem.
+
+5
+00:00:20,535 --> 00:00:23,790
+Then we looked at this end
+times vk expression and
+
+6
+00:00:23,790 --> 00:00:27,330
+be able to represent
+that directly.
+
+7
+00:00:27,330 --> 00:00:31,275
+We had two represented as a
+sequence record expression.
+
+8
+00:00:31,275 --> 00:00:32,850
+So we expanded it.
+
+9
+00:00:32,850 --> 00:00:36,510
+So times one would be just a,
+
+10
+00:00:36,510 --> 00:00:40,595
+t, times two would
+be a, and so on.
+
+11
+00:00:40,595 --> 00:00:43,535
+And so you can see if
+you go already to 13,
+
+12
+00:00:43,535 --> 00:00:47,960
+N equals 13, the record
+expression becomes quite large.
+
+13
+00:00:47,960 --> 00:00:52,865
+And now the functions
+nullable and also derivative.
+
+14
+00:00:52,865 --> 00:00:56,360
+They need to traverse
+this reg expression.
+
+15
+00:00:56,360 --> 00:00:59,060
+Remember this tree in sometimes
+
+16
+00:00:59,060 --> 00:01:01,820
+they have to traverse
+that even several times.
+
+17
+00:01:01,820 --> 00:01:04,535
+So that will slow
+down the algorithm.
+
+18
+00:01:04,535 --> 00:01:08,150
+And in particular, because
+our first reg expression was
+
+19
+00:01:08,150 --> 00:01:11,840
+actually not just a bot
+a plus one. So b hat.
+
+20
+00:01:11,840 --> 00:01:14,330
+In the case of 13,
+we had a plus one,
+
+21
+00:01:14,330 --> 00:01:17,330
+a plus one a plus born 13 times.
+
+22
+00:01:17,330 --> 00:01:20,150
+And this reg
+expression just grows,
+
+23
+00:01:20,150 --> 00:01:25,475
+stay in number n. Just to
+show you this also encode,
+
+24
+00:01:25,475 --> 00:01:28,115
+I'm Stern, This File rewarm
+
+25
+00:01:28,115 --> 00:01:30,920
+and D and I have a size function.
+
+26
+00:01:30,920 --> 00:01:33,140
+The size function takes
+a regular expression as
+
+27
+00:01:33,140 --> 00:01:36,215
+argument and produces in teacher.
+
+28
+00:01:36,215 --> 00:01:39,980
+And essentially it takes
+this rec expression scene as
+
+29
+00:01:39,980 --> 00:01:44,045
+a tree and count how many
+nodes are in this tree.
+
+30
+00:01:44,045 --> 00:01:49,490
+And so if I take this even
+one reg expression, this one,
+
+31
+00:01:49,490 --> 00:01:52,160
+and increase now this n. So you
+
+32
+00:01:52,160 --> 00:01:54,680
+can already see
+for any cross one,
+
+33
+00:01:54,680 --> 00:01:56,540
+the size of this
+record expression is
+
+34
+00:01:56,540 --> 00:01:59,615
+five and you see it
+slowly increases.
+
+35
+00:01:59,615 --> 00:02:02,150
+And this 20 n equals 20.
+
+36
+00:02:02,150 --> 00:02:07,100
+The size of this record
+expression is SMOW already a 119.
+
+37
+00:02:07,100 --> 00:02:10,145
+So now the interesting
+line as this one.
+
+38
+00:02:10,145 --> 00:02:13,295
+Remember it when we
+built the derivative,
+
+39
+00:02:13,295 --> 00:02:17,150
+very often parts of a reg
+expression gets copied.
+
+40
+00:02:17,150 --> 00:02:19,280
+So if you have like EVA,
+
+41
+00:02:19,280 --> 00:02:22,325
+one of 2019 nodes,
+
+42
+00:02:22,325 --> 00:02:25,475
+now this will be often copied.
+
+43
+00:02:25,475 --> 00:02:31,475
+And we want to see what's the
+resulting tree look like,
+
+44
+00:02:31,475 --> 00:02:32,780
+how many nodes it has.
+
+45
+00:02:32,780 --> 00:02:34,985
+So I take here either one of 20
+
+46
+00:02:34,985 --> 00:02:38,405
+and the derivative
+according to 20 a's.
+
+47
+00:02:38,405 --> 00:02:41,820
+And just have a look
+what the size is.
+
+48
+00:02:43,210 --> 00:02:45,680
+Okay, that takes away.
+
+49
+00:02:45,680 --> 00:02:48,410
+You see now this rec expression,
+
+50
+00:02:48,410 --> 00:02:52,280
+the derivative has already
+8 million plus nodes.
+
+51
+00:02:52,280 --> 00:02:55,400
+And now nullable and
+derivative have to
+
+52
+00:02:55,400 --> 00:02:58,430
+traverse these trees with
+a million plus nodes.
+
+53
+00:02:58,430 --> 00:03:01,400
+So it's no wonder
+that this is slow.
+
+54
+00:03:01,400 --> 00:03:03,860
+The first thing we
+can try to mitigate
+
+55
+00:03:03,860 --> 00:03:06,350
+this explosion problem is to
+
+56
+00:03:06,350 --> 00:03:09,050
+have an explicit and
+times reg expression.
+
+57
+00:03:09,050 --> 00:03:11,600
+So instead of expanding
+
+58
+00:03:11,600 --> 00:03:13,415
+it to the sequence
+reg expression,
+
+59
+00:03:13,415 --> 00:03:17,510
+let's just have a single
+wreck expression and times,
+
+60
+00:03:17,510 --> 00:03:20,540
+which takes an expression and
+
+61
+00:03:20,540 --> 00:03:24,470
+a number and keeps that
+regular expression Small.
+
+62
+00:03:24,470 --> 00:03:27,095
+I'm here in the fire V2,
+
+63
+00:03:27,095 --> 00:03:29,090
+which is also on Keats.
+
+64
+00:03:29,090 --> 00:03:32,570
+And the only change I made
+is I added explicitly
+
+65
+00:03:32,570 --> 00:03:33,860
+a wrecker expression for
+
+66
+00:03:33,860 --> 00:03:36,770
+N times the optional
+reg expression.
+
+67
+00:03:36,770 --> 00:03:39,920
+Very leaf out at the
+moment because this a
+
+68
+00:03:39,920 --> 00:03:41,975
+optional is represented as
+
+69
+00:03:41,975 --> 00:03:44,645
+a plus one and doesn't
+explain too much.
+
+70
+00:03:44,645 --> 00:03:47,375
+The really the culprit
+is this n times.
+
+71
+00:03:47,375 --> 00:03:51,095
+And instead of expanding
+it n times, which has,
+
+72
+00:03:51,095 --> 00:03:54,275
+take here a wreck expression
+and a natural number,
+
+73
+00:03:54,275 --> 00:03:56,960
+which says how many times
+it should be repeated.
+
+74
+00:03:56,960 --> 00:03:59,165
+And in this week we
+can keep it small.
+
+75
+00:03:59,165 --> 00:04:01,370
+But by adding that
+reg expression,
+
+76
+00:04:01,370 --> 00:04:05,150
+we now have to think about
+cases for nullable and
+
+77
+00:04:05,150 --> 00:04:07,340
+derivative so that we have
+
+78
+00:04:07,340 --> 00:04:10,205
+to now calculate next
+what they look like.
+
+79
+00:04:10,205 --> 00:04:14,060
+We can actually
+calculate what the rule
+
+80
+00:04:14,060 --> 00:04:17,525
+for nullable should be from
+how we defined it earlier.
+
+81
+00:04:17,525 --> 00:04:19,580
+Remember, if you have
+a regular expression,
+
+82
+00:04:19,580 --> 00:04:21,980
+R and B take in
+times of this are,
+
+83
+00:04:21,980 --> 00:04:25,220
+then indicates if n equals 0,
+
+84
+00:04:25,220 --> 00:04:28,130
+we find that as the
+one reg expression.
+
+85
+00:04:28,130 --> 00:04:30,380
+If n equals one,
+
+86
+00:04:30,380 --> 00:04:32,495
+it will be just a
+single copy of on.
+
+87
+00:04:32,495 --> 00:04:33,725
+If n equals two,
+
+88
+00:04:33,725 --> 00:04:35,270
+it will be two copies.
+
+89
+00:04:35,270 --> 00:04:39,260
+Sequence if three, we have
+three copies and so on.
+
+90
+00:04:39,260 --> 00:04:41,390
+Now we have to
+somehow characterize
+
+91
+00:04:41,390 --> 00:04:44,285
+when these cases all nullable.
+
+92
+00:04:44,285 --> 00:04:47,810
+Well, if n equals 0,
+
+93
+00:04:47,810 --> 00:04:49,850
+then this will be defined as one,
+
+94
+00:04:49,850 --> 00:04:52,100
+so one can match
+the empty string.
+
+95
+00:04:52,100 --> 00:04:54,785
+So that should be
+definitely true.
+
+96
+00:04:54,785 --> 00:04:57,725
+Okay, if any cross one,
+
+97
+00:04:57,725 --> 00:05:00,470
+when can this reg expression
+match the empty string?
+
+98
+00:05:00,470 --> 00:05:02,000
+So vent should be nullable.
+
+99
+00:05:02,000 --> 00:05:07,535
+Well, if nullable of our holes,
+
+100
+00:05:07,535 --> 00:05:09,860
+okay, so if I can match
+the empty string,
+
+101
+00:05:09,860 --> 00:05:12,110
+then we can match
+the empty string.
+
+102
+00:05:12,110 --> 00:05:14,870
+When can this regular expression
+match the empty string?
+
+103
+00:05:14,870 --> 00:05:16,265
+So these are now two copies of
+
+104
+00:05:16,265 --> 00:05:20,690
+our well-posed copies need
+to match the empty string.
+
+105
+00:05:20,690 --> 00:05:22,820
+So again, we have to ask
+
+106
+00:05:22,820 --> 00:05:25,774
+both of them need to be nullable.
+
+107
+00:05:25,774 --> 00:05:28,520
+Ok? Similarly, in the third case,
+
+108
+00:05:28,520 --> 00:05:30,710
+all three copies need to be
+
+109
+00:05:30,710 --> 00:05:33,230
+able to match the empty
+string. Men, is that the case?
+
+110
+00:05:33,230 --> 00:05:38,360
+Well, if nullable of
+our holes and so on.
+
+111
+00:05:38,360 --> 00:05:48,754
+So we can actually define that
+if n equals 0, then true.
+
+112
+00:05:48,754 --> 00:05:56,045
+Else we have to ask with
+nullable of our holes.
+
+113
+00:05:56,045 --> 00:05:58,550
+So that will be the clause we
+
+114
+00:05:58,550 --> 00:06:01,625
+have to implement for
+n times and nullable.
+
+115
+00:06:01,625 --> 00:06:04,280
+Deriving the definition for
+
+116
+00:06:04,280 --> 00:06:06,920
+the derivative of the n terms
+
+117
+00:06:06,920 --> 00:06:10,175
+reg expression.
+It's not that easy.
+
+118
+00:06:10,175 --> 00:06:12,755
+So we have to look again
+how it was defined
+
+119
+00:06:12,755 --> 00:06:16,010
+earlier and we somehow
+have to spot a pattern.
+
+120
+00:06:16,010 --> 00:06:18,380
+The voice, our
+algorithm will be again
+
+121
+00:06:18,380 --> 00:06:20,975
+quite slow if you don't
+spot that pattern.
+
+122
+00:06:20,975 --> 00:06:22,550
+So let's have a look.
+
+123
+00:06:22,550 --> 00:06:26,240
+So in the case that
+n is equal to 0,
+
+124
+00:06:26,240 --> 00:06:29,885
+then r n times most
+defined as just one.
+
+125
+00:06:29,885 --> 00:06:32,030
+What would be the
+derivative of one?
+
+126
+00:06:32,030 --> 00:06:36,140
+So the derivative of c of one.
+
+127
+00:06:36,140 --> 00:06:38,990
+Peaches defined as 0.
+
+128
+00:06:38,990 --> 00:06:41,465
+Okay, fair enough.
+
+129
+00:06:41,465 --> 00:06:44,960
+If he have any cross one,
+
+130
+00:06:44,960 --> 00:06:48,125
+then we just have one copy
+of this regular expression.
+
+131
+00:06:48,125 --> 00:06:50,120
+So there's not much as we can do.
+
+132
+00:06:50,120 --> 00:06:53,735
+We would have to calculate
+the derivative of air are.
+
+133
+00:06:53,735 --> 00:06:57,395
+Now in the n equals two case.
+
+134
+00:06:57,395 --> 00:07:00,245
+That would mean we
+have two copies of
+
+135
+00:07:00,245 --> 00:07:03,425
+R. And they would be a sequence.
+
+136
+00:07:03,425 --> 00:07:07,775
+How would be the derivative C of
+
+137
+00:07:07,775 --> 00:07:10,340
+R four by R be
+
+138
+00:07:10,340 --> 00:07:13,265
+defined where we would
+look up the definition.
+
+139
+00:07:13,265 --> 00:07:15,470
+And it would say that's
+the complicated case
+
+140
+00:07:15,470 --> 00:07:16,760
+d sequence or
+
+141
+00:07:16,760 --> 00:07:21,875
+if null a bowl of R,
+
+142
+00:07:21,875 --> 00:07:25,250
+Then the most complicated case.
+
+143
+00:07:25,250 --> 00:07:28,225
+Else, That's the easy
+case that would be
+
+144
+00:07:28,225 --> 00:07:32,660
+derivative of c of R four
+
+145
+00:07:32,660 --> 00:07:35,540
+by R. And then I
+just have to copy
+
+146
+00:07:35,540 --> 00:07:40,775
+that derivative of C
+of four by r here.
+
+147
+00:07:40,775 --> 00:07:43,130
+But in this case I have also
+
+148
+00:07:43,130 --> 00:07:51,145
+the alternative derivative
+of c of r. And from that,
+
+149
+00:07:51,145 --> 00:07:55,030
+it looks like we can
+do much better than
+
+150
+00:07:55,030 --> 00:07:58,390
+that unless we do
+
+151
+00:07:58,390 --> 00:08:02,560
+a little bit of magic and the
+magic to do with this case.
+
+152
+00:08:02,560 --> 00:08:07,420
+So let me look at this
+case a bit more closely.
+
+153
+00:08:07,420 --> 00:08:09,819
+Let me go back to slides
+
+154
+00:08:09,819 --> 00:08:12,940
+because actually the calculation
+might be a bit hairy.
+
+155
+00:08:12,940 --> 00:08:16,945
+So remember we are in this
+case where n equals two.
+
+156
+00:08:16,945 --> 00:08:18,550
+And this was defined as
+
+157
+00:08:18,550 --> 00:08:20,680
+this sequence work
+expression R followed
+
+158
+00:08:20,680 --> 00:08:23,080
+by r. And the question was,
+
+159
+00:08:23,080 --> 00:08:26,365
+can we somehow make sense
+out of this derivative
+
+160
+00:08:26,365 --> 00:08:30,370
+where we have to build the
+derivative of a sequence.
+
+161
+00:08:30,370 --> 00:08:33,020
+So features the definition.
+
+162
+00:08:33,020 --> 00:08:36,050
+We would ask if
+the r is nullable,
+
+163
+00:08:36,050 --> 00:08:39,095
+In this case, we return
+this alternative.
+
+164
+00:08:39,095 --> 00:08:40,640
+And if it's not nullable,
+
+165
+00:08:40,640 --> 00:08:44,135
+It is just this
+record expression.
+
+166
+00:08:44,135 --> 00:08:49,550
+Now my claim is that
+this whole if condition
+
+167
+00:08:49,550 --> 00:08:55,895
+can be replaced by just this
+simple derivative here.
+
+168
+00:08:55,895 --> 00:08:58,775
+And if that claim is correct,
+
+169
+00:08:58,775 --> 00:09:01,520
+there would be very nice
+because in contrast to
+
+170
+00:09:01,520 --> 00:09:04,130
+this if condition where
+
+171
+00:09:04,130 --> 00:09:06,280
+we have to calculate
+like nullable,
+
+172
+00:09:06,280 --> 00:09:08,330
+decide in which branch we are.
+
+173
+00:09:08,330 --> 00:09:10,580
+We don't have to
+calculate your now,
+
+174
+00:09:10,580 --> 00:09:13,880
+but we can just replace
+it by this expression.
+
+175
+00:09:13,880 --> 00:09:16,670
+So if we can
+substantiate that claim,
+
+176
+00:09:16,670 --> 00:09:19,860
+that will be definitely
+good file algorithm.
+
+177
+00:09:20,140 --> 00:09:22,775
+And to substantiate that,
+
+178
+00:09:22,775 --> 00:09:26,795
+I will focus on this
+record expression here.
+
+179
+00:09:26,795 --> 00:09:31,100
+And notice that this record
+expression the only be
+
+180
+00:09:31,100 --> 00:09:35,780
+called or only be generated
+if r is nullable.
+
+181
+00:09:35,780 --> 00:09:38,075
+So in any other case,
+
+182
+00:09:38,075 --> 00:09:40,060
+I will actually not go into it
+
+183
+00:09:40,060 --> 00:09:43,850
+that if branch and would
+be in the other one.
+
+184
+00:09:43,850 --> 00:09:45,260
+So if we are in this if branch,
+
+185
+00:09:45,260 --> 00:09:47,705
+we definitely know
+that R is nullable.
+
+186
+00:09:47,705 --> 00:09:52,955
+Okay? Okay, so here's
+our regular expression.
+
+187
+00:09:52,955 --> 00:09:55,940
+And we know it's nullable.
+
+188
+00:09:55,940 --> 00:09:57,920
+So we have to somehow find
+
+189
+00:09:57,920 --> 00:10:00,380
+an equivalent expression that
+
+190
+00:10:00,380 --> 00:10:04,100
+is smaller or simpler
+than that one.
+
+191
+00:10:04,100 --> 00:10:05,945
+Let's see what we can do.
+
+192
+00:10:05,945 --> 00:10:10,160
+So the first thing
+actually is we multiplying
+
+193
+00:10:10,160 --> 00:10:16,595
+this right hand side of the
+alternative is times one.
+
+194
+00:10:16,595 --> 00:10:19,700
+We can do that
+because this does not
+
+195
+00:10:19,700 --> 00:10:23,090
+change which strings this
+work expression can match.
+
+196
+00:10:23,090 --> 00:10:25,685
+Remember we even had it
+as a simplification row,
+
+197
+00:10:25,685 --> 00:10:27,425
+just in this case B,
+
+198
+00:10:27,425 --> 00:10:29,705
+don't apply it as a
+simplification will
+
+199
+00:10:29,705 --> 00:10:31,310
+actually make this
+work expression
+
+200
+00:10:31,310 --> 00:10:32,720
+a bit more complicated.
+
+201
+00:10:32,720 --> 00:10:34,910
+But times one doesn't make
+
+202
+00:10:34,910 --> 00:10:37,820
+a difference because it
+means the end of the string,
+
+203
+00:10:37,820 --> 00:10:40,070
+we still want to match
+the empty string.
+
+204
+00:10:40,070 --> 00:10:42,155
+Okay, so that is possible.
+
+205
+00:10:42,155 --> 00:10:45,740
+I can do that. Once
+we have done that,
+
+206
+00:10:45,740 --> 00:10:48,410
+you will notice that this
+factor derivative of
+
+207
+00:10:48,410 --> 00:10:51,860
+stuff are exactly the
+same as that one.
+
+208
+00:10:51,860 --> 00:10:54,650
+And in between is a plus.
+
+209
+00:10:54,650 --> 00:10:57,440
+So you probably remember the law
+
+210
+00:10:57,440 --> 00:11:00,170
+from school math
+that I can pull out
+
+211
+00:11:00,170 --> 00:11:02,735
+this factor derivative of c of r.
+
+212
+00:11:02,735 --> 00:11:06,320
+And I'm inside the parentheses
+is left with that.
+
+213
+00:11:06,320 --> 00:11:09,245
+So now I have r plus one.
+
+214
+00:11:09,245 --> 00:11:13,055
+Usually we cannot
+simplify r plus one.
+
+215
+00:11:13,055 --> 00:11:15,530
+If it had been R
+plus 0, then yes,
+
+216
+00:11:15,530 --> 00:11:18,665
+we could have got rid of the CRO.
+
+217
+00:11:18,665 --> 00:11:21,590
+Plus one often
+makes a difference,
+
+218
+00:11:21,590 --> 00:11:22,970
+but not in our case.
+
+219
+00:11:22,970 --> 00:11:25,940
+Remember, we know that
+this R is nullable,
+
+220
+00:11:25,940 --> 00:11:29,840
+so on its own can already
+match the empty string.
+
+221
+00:11:29,840 --> 00:11:33,305
+So we don't really need this
+alternative plus one there,
+
+222
+00:11:33,305 --> 00:11:35,300
+so we can just get rid of that.
+
+223
+00:11:35,300 --> 00:11:37,070
+Okay, and so now we have
+
+224
+00:11:37,070 --> 00:11:40,535
+a much simpler wound
+reg expression.
+
+225
+00:11:40,535 --> 00:11:44,285
+And this actually helps a
+lot for our if condition.
+
+226
+00:11:44,285 --> 00:11:46,925
+Look, this was the
+original if condition
+
+227
+00:11:46,925 --> 00:11:50,270
+and this is direct expression
+h. We just simplified.
+
+228
+00:11:50,270 --> 00:11:53,105
+If we replace it with this one,
+
+229
+00:11:53,105 --> 00:11:56,090
+then we just end up with this.
+
+230
+00:11:56,090 --> 00:11:59,510
+And now you will see that
+the if condition is actually
+
+231
+00:11:59,510 --> 00:12:02,750
+pointless because you
+check if it's null above,
+
+232
+00:12:02,750 --> 00:12:05,060
+we return this reg
+expression or it's
+
+233
+00:12:05,060 --> 00:12:08,210
+not nullable and we
+return exactly the same.
+
+234
+00:12:08,210 --> 00:12:10,025
+That doesn't make any difference.
+
+235
+00:12:10,025 --> 00:12:11,750
+So we can just get rid of
+
+236
+00:12:11,750 --> 00:12:14,645
+that one and can
+replace it by that.
+
+237
+00:12:14,645 --> 00:12:16,865
+And you see, this is now
+
+238
+00:12:16,865 --> 00:12:20,720
+a much simpler case than
+what we had before.
+
+239
+00:12:20,720 --> 00:12:24,170
+So let's take stock
+what we have so far.
+
+240
+00:12:24,170 --> 00:12:26,915
+So we know India CEO case,
+
+241
+00:12:26,915 --> 00:12:30,920
+the derivative needs
+to be defined as 0.
+
+242
+00:12:30,920 --> 00:12:33,095
+So because we define this
+
+243
+00:12:33,095 --> 00:12:36,785
+and times as one,
+the derivative is 0.
+
+244
+00:12:36,785 --> 00:12:39,889
+For chest r, this will
+be the derivative.
+
+245
+00:12:39,889 --> 00:12:42,170
+And we can't do any
+better than that
+
+246
+00:12:42,170 --> 00:12:45,620
+for our followed by
+RB just found out.
+
+247
+00:12:45,620 --> 00:12:47,270
+Actually it is quite simple.
+
+248
+00:12:47,270 --> 00:12:51,410
+Reg expression is equivalent
+to the derivative.
+
+249
+00:12:51,410 --> 00:12:53,870
+Now, we have to continue with
+
+250
+00:12:53,870 --> 00:12:56,090
+that case where n is
+equal to three and we
+
+251
+00:12:56,090 --> 00:12:58,099
+now have three copies
+
+252
+00:12:58,099 --> 00:13:02,390
+of this or what should
+be the derivative?
+
+253
+00:13:02,390 --> 00:13:05,330
+Well, if you look very carefully
+
+254
+00:13:05,330 --> 00:13:08,465
+at this emerging pattern,
+
+255
+00:13:08,465 --> 00:13:12,410
+I have to say then
+what would be nice if,
+
+256
+00:13:12,410 --> 00:13:16,400
+if he could show that in
+the n equals three case,
+
+257
+00:13:16,400 --> 00:13:18,275
+we end up with this.
+
+258
+00:13:18,275 --> 00:13:21,290
+Because then what we have is.
+
+259
+00:13:21,290 --> 00:13:25,370
+We can define our
+nullable for n times
+
+260
+00:13:25,370 --> 00:13:31,025
+s. If any cross 0 then
+true as nullable.
+
+261
+00:13:31,025 --> 00:13:33,875
+And for the derivative,
+
+262
+00:13:33,875 --> 00:13:37,190
+we can save if n is equal to 0,
+
+263
+00:13:37,190 --> 00:13:40,235
+then we return the
+Sierra reg expression.
+
+264
+00:13:40,235 --> 00:13:43,295
+Otherwise, as you can see
+from this pattern here,
+
+265
+00:13:43,295 --> 00:13:50,735
+it would be derivative of
+c r four by r n minus one.
+
+266
+00:13:50,735 --> 00:13:54,770
+Okay? And the only
+
+267
+00:13:54,770 --> 00:13:56,330
+thing we have to make choice that
+
+268
+00:13:56,330 --> 00:13:58,175
+this pattern actually holds.
+
+269
+00:13:58,175 --> 00:14:00,470
+So that's, I will
+show you next in
+
+270
+00:14:00,470 --> 00:14:03,735
+the case for n equals three.
+
+271
+00:14:03,735 --> 00:14:07,810
+If we have a wreck expression R
+
+272
+00:14:07,810 --> 00:14:09,820
+and it's followed
+by r and another r,
+
+273
+00:14:09,820 --> 00:14:11,275
+three copies of it.
+
+274
+00:14:11,275 --> 00:14:14,245
+We can just unfold
+again the definition.
+
+275
+00:14:14,245 --> 00:14:16,930
+So we would ask if I is nullable,
+
+276
+00:14:16,930 --> 00:14:19,765
+then we have this if branch.
+
+277
+00:14:19,765 --> 00:14:23,905
+And if i is not nullable
+or we have this as branch.
+
+278
+00:14:23,905 --> 00:14:27,010
+Okay? What can we do here?
+
+279
+00:14:27,010 --> 00:14:30,310
+Well, we notice that
+this one is just now
+
+280
+00:14:30,310 --> 00:14:34,510
+the derivative of two
+Rs, one after another.
+
+281
+00:14:34,510 --> 00:14:37,330
+But this we just
+calculated a moment ago,
+
+282
+00:14:37,330 --> 00:14:40,120
+so we can just
+replace it by this.
+
+283
+00:14:40,120 --> 00:14:43,255
+Ok. That's what we
+calculated earlier.
+
+284
+00:14:43,255 --> 00:14:46,665
+But now you will see
+again this factor,
+
+285
+00:14:46,665 --> 00:14:48,695
+and this factor is the same.
+
+286
+00:14:48,695 --> 00:14:52,700
+So I can pull that
+out to be like that.
+
+287
+00:14:52,700 --> 00:14:57,380
+And I have now followed
+by R plus R. Oh,
+
+288
+00:14:57,380 --> 00:14:59,030
+hey, man, now you probably
+
+289
+00:14:59,030 --> 00:15:00,680
+remember how we did it earlier.
+
+290
+00:15:00,680 --> 00:15:03,080
+We can now pull out one copy of
+
+291
+00:15:03,080 --> 00:15:06,020
+this are to just get
+something like this.
+
+292
+00:15:06,020 --> 00:15:08,765
+We have to add one essentially,
+
+293
+00:15:08,765 --> 00:15:11,615
+but we now get r plus one.
+
+294
+00:15:11,615 --> 00:15:15,065
+And this r here is
+now just pulled out.
+
+295
+00:15:15,065 --> 00:15:18,995
+Well, here again kicks
+in this reasoning.
+
+296
+00:15:18,995 --> 00:15:22,700
+We go in this if branch
+only if r is nullable,
+
+297
+00:15:22,700 --> 00:15:26,150
+so on its own can already
+match the empty string.
+
+298
+00:15:26,150 --> 00:15:28,895
+So I don't need
+really this plus one.
+
+299
+00:15:28,895 --> 00:15:30,695
+I can just get rid of it.
+
+300
+00:15:30,695 --> 00:15:33,140
+And so I now just have
+to rearrange the parent,
+
+301
+00:15:33,140 --> 00:15:35,450
+the thesis which we
+said we can also do.
+
+302
+00:15:35,450 --> 00:15:37,595
+So we have something like that.
+
+303
+00:15:37,595 --> 00:15:39,740
+And here again, the
+same reasoning,
+
+304
+00:15:39,740 --> 00:15:41,975
+we have a if condition
+
+305
+00:15:41,975 --> 00:15:43,310
+where it doesn't
+really matter what
+
+306
+00:15:43,310 --> 00:15:44,405
+we're going to return,
+
+307
+00:15:44,405 --> 00:15:46,205
+it's in both branches the same.
+
+308
+00:15:46,205 --> 00:15:48,470
+So we can just
+replace it by that.
+
+309
+00:15:48,470 --> 00:15:51,920
+And yes, now we can be
+pretty sure they'll
+
+310
+00:15:51,920 --> 00:15:55,310
+work for all the n times
+regular expressions.
+
+311
+00:15:55,310 --> 00:15:57,860
+And leave that to
+the calculation for
+
+312
+00:15:57,860 --> 00:16:02,570
+maybe R to the four to you.
+
+313
+00:16:02,570 --> 00:16:04,490
+And the reason why I do it
+
+314
+00:16:04,490 --> 00:16:06,605
+in such a detail,
+this calculation,
+
+315
+00:16:06,605 --> 00:16:08,960
+this is really meant
+to help you with
+
+316
+00:16:08,960 --> 00:16:13,200
+the coursework which is
+coming up this week.
+
+317
+00:16:13,210 --> 00:16:16,250
+I'm now back in our
+implementation.
+
+318
+00:16:16,250 --> 00:16:20,660
+In this Reto, said We have
+this explicit constructor now
+
+319
+00:16:20,660 --> 00:16:25,535
+for n times b can now fill
+in the cases for nullable.
+
+320
+00:16:25,535 --> 00:16:27,635
+So if we have R in times,
+
+321
+00:16:27,635 --> 00:16:30,995
+if this n is equal to
+0, we return true.
+
+322
+00:16:30,995 --> 00:16:34,190
+Otherwise we have to test
+whether eyes nullable.
+
+323
+00:16:34,190 --> 00:16:37,565
+And in the derivative case, oi,
+
+324
+00:16:37,565 --> 00:16:40,339
+if this n is equal to 0,
+
+325
+00:16:40,339 --> 00:16:43,564
+the return this 0
+wreck expression.
+
+326
+00:16:43,564 --> 00:16:46,700
+Otherwise we return the
+sequence of the derivative
+
+327
+00:16:46,700 --> 00:16:50,270
+of psi of r four by n times of r,
+
+328
+00:16:50,270 --> 00:16:54,545
+but n minus one times and
+everything else stays the same.
+
+329
+00:16:54,545 --> 00:16:56,510
+Here's the function for strings,
+
+330
+00:16:56,510 --> 00:16:58,430
+derivative function for strings.
+
+331
+00:16:58,430 --> 00:17:01,595
+In the main mantra
+function as all the same.
+
+332
+00:17:01,595 --> 00:17:04,820
+We still require this
+definition because
+
+333
+00:17:04,820 --> 00:17:06,050
+we haven't done anything about
+
+334
+00:17:06,050 --> 00:17:08,090
+the optional record
+expression yet.
+
+335
+00:17:08,090 --> 00:17:10,670
+And we have now at this
+
+336
+00:17:10,670 --> 00:17:13,250
+either warm and evil
+2-break expression.
+
+337
+00:17:13,250 --> 00:17:15,290
+And let's test it. And let's be
+
+338
+00:17:15,290 --> 00:17:17,000
+a bit more ambitious.
+Be testing it.
+
+339
+00:17:17,000 --> 00:17:20,315
+The strings between 01000
+
+340
+00:17:20,315 --> 00:17:22,580
+and let's see what the time say.
+
+341
+00:17:22,580 --> 00:17:26,210
+I'm testing this again
+inside the ammonite rebel.
+
+342
+00:17:26,210 --> 00:17:30,180
+And you'll see it should
+be now much quicker.
+
+343
+00:17:30,610 --> 00:17:34,640
+Okay, it might slow
+down also around 600.
+
+344
+00:17:34,640 --> 00:17:40,490
+700 needs two seconds,
+three seconds, 4800.
+
+345
+00:17:40,490 --> 00:17:43,940
+Let's see about it
+needs 4 thousand.
+
+346
+00:17:43,940 --> 00:17:47,435
+But you have to remember
+Ruby and Python.
+
+347
+00:17:47,435 --> 00:17:51,530
+They needed half a
+minute to just 43 TAs.
+
+348
+00:17:51,530 --> 00:17:54,485
+We need a little bit
+more than six seconds,
+
+349
+00:17:54,485 --> 00:17:57,110
+but we are processing a string of
+
+350
+00:17:57,110 --> 00:18:00,575
+1000 days so that success.
+
+351
+00:18:00,575 --> 00:18:04,775
+So this speed is also explained
+if you look at the sizes.
+
+352
+00:18:04,775 --> 00:18:08,975
+Since we now have this explicit
+and times constructor,
+
+353
+00:18:08,975 --> 00:18:11,930
+it doesn't really matter
+how big this n is.
+
+354
+00:18:11,930 --> 00:18:14,540
+This evil one reg
+expression will always be
+
+355
+00:18:14,540 --> 00:18:17,195
+of this size seven,
+the beginning.
+
+356
+00:18:17,195 --> 00:18:20,315
+And you can also see if you
+now build the derivatives,
+
+357
+00:18:20,315 --> 00:18:22,550
+they still grow in size,
+
+358
+00:18:22,550 --> 00:18:24,740
+but much more moderately.
+
+359
+00:18:24,740 --> 00:18:28,100
+And let's try out this
+example, this 20 a.
+
+360
+00:18:28,100 --> 00:18:31,685
+So we build the derivative
+according to 20 character A's.
+
+361
+00:18:31,685 --> 00:18:33,890
+In the earlier example,
+
+362
+00:18:33,890 --> 00:18:35,780
+we ended up this a
+wreck expression which
+
+363
+00:18:35,780 --> 00:18:37,895
+had like 8 million plus nodes.
+
+364
+00:18:37,895 --> 00:18:39,830
+And if we do this now,
+
+365
+00:18:39,830 --> 00:18:43,205
+then we just have a wreck
+expression with 211 nodes.
+
+366
+00:18:43,205 --> 00:18:44,750
+And that is much smaller and
+
+367
+00:18:44,750 --> 00:18:47,165
+all the calculations
+would be much quicker.
+
+368
+00:18:47,165 --> 00:18:49,550
+So yeah, there's
+
+369
+00:18:49,550 --> 00:18:52,205
+this push off CKY algorithm
+and this improvement.
+
+370
+00:18:52,205 --> 00:18:54,890
+We're now running
+circles around Ruby and
+
+371
+00:18:54,890 --> 00:18:58,445
+Python because they're just
+stuck here at the beginning.
+
+372
+00:18:58,445 --> 00:19:00,230
+Because they need already
+
+373
+00:19:00,230 --> 00:19:02,975
+like half a minute
+for just 30 a's.
+
+374
+00:19:02,975 --> 00:19:05,930
+We now can do something
+like thousand a's.
+
+375
+00:19:05,930 --> 00:19:07,580
+And in reasonable time.
+
+376
+00:19:07,580 --> 00:19:09,740
+I think this must be
+timing I obtained with
+
+377
+00:19:09,740 --> 00:19:12,635
+my older laptop some time ago.
+
+378
+00:19:12,635 --> 00:19:14,210
+Because remember we
+had something like
+
+379
+00:19:14,210 --> 00:19:16,670
+six seconds here, it says 15.
+
+380
+00:19:16,670 --> 00:19:18,080
+So you always have to take
+
+381
+00:19:18,080 --> 00:19:20,885
+these times with
+the pinch of salt.
+
+382
+00:19:20,885 --> 00:19:22,670
+It's essentially only the trend,
+
+383
+00:19:22,670 --> 00:19:25,100
+but it's clear we are
+much, much better.
+
+384
+00:19:25,100 --> 00:19:27,065
+So we have worked a lot,
+
+385
+00:19:27,065 --> 00:19:30,720
+but we also got something
+for it in return.