updated
authorChristian Urban <urbanc@in.tum.de>
Wed, 15 Mar 2017 01:24:39 +0000
changeset 477 b78664a24f5d
parent 476 d922cc83b70c
child 478 48b842c997c7
updated
LINKS
data.sty
handouts/ho01.pdf
handouts/ho01.tex
handouts/ho02.pdf
handouts/ho02.tex
progs/re1.scala
progs/re2.scala
progs/re3.scala
progs/re4.scala
--- a/LINKS	Mon Feb 13 23:22:45 2017 +0000
+++ b/LINKS	Wed Mar 15 01:24:39 2017 +0000
@@ -3,3 +3,15 @@
 https://dere.github.io/2017-02-12/beginners-assembly-part1/
 
 
+webassembly
+https://sourceware.org/ml/binutils/2017-03/msg00044.html
+https://hacks.mozilla.org/2017/02/a-cartoon-intro-to-webassembly/
+
+webassembly explorer
+https://mbebenita.github.io/WasmExplorer/
+
+
+
+free books
+https://github.com/vhf/free-programming-books/blob/master/free-programming-books.md
+https://john.cs.olemiss.edu/~hcc/csci658/notes/Free_Prog_Lang_Textbooks.html
\ No newline at end of file
--- a/data.sty	Mon Feb 13 23:22:45 2017 +0000
+++ b/data.sty	Wed Mar 15 01:24:39 2017 +0000
@@ -142,18 +142,18 @@
 
 %% re2.scala example a?{n} a{n}
 \begin{filecontents}{re2.data}
-1 0.00006
-101 0.00605
-201 0.04343
-301 0.16469
-401 0.41954
-501 0.83703
-601 1.66925
-701 2.71086
-801 4.19745
-901 7.56495
-1001 12.42103
-1101 20.41763
+1 0.00050
+101 0.02030
+201 0.10587
+301 0.31188
+401 0.32794
+501 0.64490
+601 1.16738
+701 2.10815
+801 3.47144
+901 6.80621
+1001 12.35611
+1101 23.80084
 \end{filecontents}
 
 %% re2.scala: example (a*)* b
@@ -182,36 +182,35 @@
 
 %% re3.scala: example a?{n} a{n}
 \begin{filecontents}{re3.data}
-1 0.00005
-1001 0.63505
-2001 2.53029
-3001 5.72804
-4001 9.94246
-5001 15.52770
-6001 22.44126
-7001 30.86867
-8001 39.32242
-9001 48.96998
+1 0.00003
+1001 0.03887
+2001 0.15666
+3001 0.35910
+4001 0.63950
+5001 1.00241
+6001 1.50480
+7001 2.11568
+8001 2.71208
+9001 3.41157
+10001 4.19962
+11001 5.70387
 \end{filecontents}
 
 %% re3.scala: example (a*)* b
 \begin{filecontents}{re3a.data}
-1 0.00014
-500001 2.61059
-1000001 5.42773
-1500001 8.02603
-2000001 10.49844
-2500001 13.34234
-3000001 16.17491
-3500001 19.11650
-4000001 21.66151
-4500001 24.85496
-5000001 28.52113
-5500001 28.54548
-6000001 32.39523
-6500001 45.13486
-7000001 54.15018
-7500001 71.32218
+1 0.00015
+500001 0.28337
+1000001 0.53271
+1500001 0.84478
+2000001 1.11763
+2500001 1.76656
+3000001 2.13310
+3500001 2.39576
+4000001 2.98624
+4500001 5.96529
+5000001 13.56911
+5500001 18.43089
+6000001 40.33704
 \end{filecontents}
 
 %% re4.scala example a?{n} a{n}
Binary file handouts/ho01.pdf has changed
--- a/handouts/ho01.tex	Mon Feb 13 23:22:45 2017 +0000
+++ b/handouts/ho01.tex	Wed Mar 15 01:24:39 2017 +0000
@@ -3,6 +3,8 @@
 \usepackage{../langs}
 \usepackage{../graphics}
 \usepackage{../data}
+\usepackage{lstlinebgrd}
+\definecolor{capri}{rgb}{0.0, 0.75, 1.0}
 
 %%http://regexcrossword.com/challenges/cities/puzzles/1
 %%https://jex.im/regulex/
@@ -37,24 +39,24 @@
 \section*{Handout 1}
 
 This module is about text processing, be it for web-crawlers,
-compilers, dictionaries, DNA-data and so on. When looking for a
+compilers, dictionaries, DNA-data and so on.  When looking for a
 particular string, like $abc$ in a large text we can use the
 Knuth-Morris-Pratt algorithm, which is currently the most efficient
 general string search algorithm. But often we do \emph{not} just look
-for a particular string, but for string patterns. For example in
+for a particular string, but for string patterns. For example, in
 program code we need to identify what are the keywords (if, then,
 while, etc), what are the identifiers (variable names). A pattern for
 identifiers could be stated as: they start with a letter, followed by
-zero or more letters, numbers and underscores.  Also often we face the
+zero or more letters, numbers and underscores.  Often we also face the
 problem that we are given a string (for example some user input) and
 want to know whether it matches a particular pattern---be it an email
 address, for example. In this way we can exclude user input that would
 otherwise have nasty effects on our program (crashing it or making it
-go into an infinite loop, if not worse). Scanning for computer viruses
-or filtering out spam usually involves scanning for some signature
-(essentially a pattern).  The point is that the fast
-Knuth-Morris-Pratt algorithm for strings is not good enough for such
-string \emph{patterns}.\smallskip
+go into an infinite loop, if not worse). In tools like Snort, scanning
+for computer viruses or filtering out spam usually involves scanning
+for some signature (essentially a string pattern).  The point is that
+the fast Knuth-Morris-Pratt algorithm for strings is not good enough
+for such string \emph{patterns}.\smallskip
 
 \defn{Regular expressions} help with conveniently specifying
 such patterns. The idea behind regular expressions is that
@@ -190,7 +192,31 @@
 matching in Python, Ruby and Java.
 
 \begin{center}
-\begin{tabular}{@{\hspace{-1mm}}c@{\hspace{-1mm}}c@{}}
+\begin{tabular}{@{\hspace{-1mm}}c@{\hspace{1mm}}c@{}}
+\begin{tikzpicture}
+\begin{axis}[
+    title={Graph: $\texttt{(a*)*\,b}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5.5cm,
+    height=4.5cm, 
+    legend entries={Python, Java},  
+    legend pos=north west,
+    legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+\end{axis}
+\end{tikzpicture}
+&
 \begin{tikzpicture}
 \begin{axis}[
     title={Graph: $\texttt{a?\{n\}\,a{\{n\}}}$ and strings 
@@ -214,47 +240,24 @@
 \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};  
 \end{axis}
 \end{tikzpicture}
-&
-\begin{tikzpicture}
-\begin{axis}[
-    title={Graph: $\texttt{(a*)*\,b}$ and strings 
-           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
-    xlabel={$n$},
-    x label style={at={(1.05,0.0)}},
-    ylabel={time in secs},
-    enlargelimits=false,
-    xtick={0,5,...,30},
-    xmax=33,
-    ymax=35,
-    ytick={0,5,...,30},
-    scaled ticks=false,
-    axis lines=left,
-    width=5.5cm,
-    height=4.5cm, 
-    legend entries={Python, Java},  
-    legend pos=north west,
-    legend cell align=left]
-\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
-\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
-\end{axis}
-\end{tikzpicture}
 \end{tabular}
 \end{center}
 
-\noindent This first graph shows that Python needs approximately 29
-seconds for finding out whether a string of 28 \texttt{a}s matches the
-regular expression \texttt{a?\{28\}\,a\{28\}}.  Ruby is even slightly
+\noindent This first graph shows that Python and Java need
+approximately 30 seconds to find out that the regular expression
+$\texttt{(a*)*\,b}$ does not match strings of 28 \texttt{a}s.
+Similarly, the second shows that Python needs approximately 29 seconds
+for finding out whether a string of 28 \texttt{a}s matches the regular
+expression \texttt{a?\{28\}\,a\{28\}}.  Ruby is even slightly
 worse.\footnote{In this example Ruby uses the slightly different
   regular expression \texttt{a?a?a?...a?a?aaa...aa}, where the
   \texttt{a?} and \texttt{a} each occur $n$ times. More such test
   cases can be found at \url{http://www.computerbytesman.com/redos/}.}
-Simlarly, Python and Java needs approximately 30 seconds to find out
-that the regular expression $\texttt{(a*)*\,b}$ does not match strings
-of 28 \texttt{a}s.  Admittedly, these regular expressions are
-carefully chosen to exhibit this exponential behaviour, but similar
-ones occur more often than one wants in ``real life''. For example, on
-20 July 2016 a similar regular expression brought the webpage
-\href{http://stackexchange.com}{Stack Exchange} to its knees:
+Admittedly, these regular expressions are carefully chosen to exhibit
+this exponential behaviour, but similar ones occur more often than one
+wants in ``real life''. For example, on 20 July 2016 a similar regular
+expression brought the webpage \href{http://stackexchange.com}{Stack
+  Exchange} to its knees:
 
 \begin{center}
 \url{http://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
@@ -293,9 +296,9 @@
 at a relatively simple algorithm that solves this problem much
 better than Python and Ruby do\ldots actually it will be two
 versions of the algorithm: the first one will be able to
-process strings of approximately 1,000 \texttt{a}s in 30
+process strings of approximately 1,100 \texttt{a}s in 23 
 seconds, while the second version will even be able to process
-up to 7,000(!) in 30 seconds, see the graph below:
+up to 11,000(!) in 5 seconds, see the graph below:
 
 \begin{center}
 \begin{tikzpicture}
@@ -306,8 +309,8 @@
     x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
-    xtick={0,3000,...,9000},
-    xmax=10000,
+    xtick={0,3000,...,12000},
+    xmax=13000,
     ymax=32,
     ytick={0,5,...,30},
     scaled ticks=false,
@@ -696,8 +699,9 @@
 \end{quote}  
 
 
-\begin{figure}[p]
-\lstinputlisting{../progs/crawler1.scala}
+\begin{figure}[p]\small
+  \lstinputlisting[linebackgroundcolor={\ifodd\value{lstnumber}\color{capri!3}\fi}]
+                  {../progs/crawler1.scala}
 
 \caption{The Scala code for a simple web-crawler that checks
 for broken links in a web-page. It uses the regular expression
@@ -708,8 +712,10 @@
 \end{figure}
 
 
-\begin{figure}[p]
-\lstinputlisting{../progs/crawler2.scala}
+
+\begin{figure}[p]\small
+  \lstinputlisting[linebackgroundcolor={\ifodd\value{lstnumber}\color{capri!3}\fi}]
+                  {../progs/crawler2.scala}
 
 \caption{A version of the web-crawler that only follows links
 in ``my'' domain---since these are the ones I am interested in
@@ -722,8 +728,9 @@
 
 \end{figure}
 
-\begin{figure}[p]
-\lstinputlisting{../progs/crawler3.scala}
+\begin{figure}[p]\small
+  \lstinputlisting[linebackgroundcolor={\ifodd\value{lstnumber}\color{capri!3}\fi}]
+                  {../progs/crawler3.scala}
 
 \caption{A small email harvester---whenever we download a
 web-page, we also check whether it contains any email
Binary file handouts/ho02.pdf has changed
--- a/handouts/ho02.tex	Mon Feb 13 23:22:45 2017 +0000
+++ b/handouts/ho02.tex	Wed Mar 15 01:24:39 2017 +0000
@@ -3,7 +3,8 @@
 \usepackage{../langs}
 \usepackage{../graphics}
 \usepackage{../data}
-
+\usepackage{lstlinebgrd}
+\definecolor{capri}{rgb}{0.0, 0.75, 1.0}
 
 \begin{document}
 \fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016, 2017}
@@ -54,8 +55,8 @@
     x label style={at={(1.1,0.05)}},
     ylabel={\small time in secs},
     enlargelimits=false,
-    xtick={0,3000,...,9000},
-    xmax=10000,
+    xtick={0,2500,...,11000},
+    xmax=12000,
     ymax=35,
     ytick={0,5,...,30},
     scaled ticks=false,
@@ -86,9 +87,10 @@
     axis lines=left,
     width=5cm,
     height=5cm, 
-    legend entries={Java},  
+    legend entries={Java, Python},  
     legend pos=north west,
     legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
 \end{axis}
 \end{tikzpicture}
@@ -461,7 +463,9 @@
 for \pcode{matches} are shown in Figure~\ref{scala1}.
 
 \begin{figure}[p]
-\lstinputlisting{../progs/app5.scala}
+  \lstinputlisting[numbers=left,linebackgroundcolor=
+                  {\ifodd\value{lstnumber}\color{capri!3}\fi}]
+                  {../progs/app5.scala}
 \caption{Scala implementation of the \textit{nullable} and 
   derivative functions. These functions are easy to
   implement in functional languages, because their built-in pattern 
@@ -581,8 +585,8 @@
     x label style={at={(1.01,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
-    xtick={0,100,...,1000},
-    xmax=1100,
+    xtick={0,200,...,1100},
+    xmax=1200,
     ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
@@ -600,8 +604,8 @@
 \end{center}
 
 \noindent Now we are talking business! The modified matcher 
-can within 30 seconds handle regular expressions up to
-$n = 950$ before a StackOverflow is raised. Recall that Python and Ruby 
+can within 25 seconds handle regular expressions up to
+$n = 1,100$ before a StackOverflow is raised. Recall that Python and Ruby 
 (and our first version, Scala V1) could only handle $n = 27$ or so in 30 
 seconds. There is no change for our second example 
 $(a^*)^* \cdot b$---so this is still good.
@@ -652,7 +656,9 @@
 function introduced. Does this improve the speed? You bet!!
 
 \begin{figure}[p]
-\lstinputlisting{../progs/app6.scala}
+\lstinputlisting[numbers=left,linebackgroundcolor=
+  {\ifodd\value{lstnumber}\color{capri!3}\fi}]
+                {../progs/app6.scala}
 \caption{The simplification function and modified 
 \texttt{ders}-function; this function now
 calls \texttt{der} first, but then simplifies
--- a/progs/re1.scala	Mon Feb 13 23:22:45 2017 +0000
+++ b/progs/re1.scala	Wed Mar 15 01:24:39 2017 +0000
@@ -1,3 +1,5 @@
+// Simple matcher for basic regular expressions
+
 
 abstract class Rexp
 case object ZERO extends Rexp                    // matches nothing
@@ -40,22 +42,27 @@
 // main matcher function
 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
 
+
 //examples from the homework
+
 val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
 der('a', r)
 der('b', r)
 der('c', r)
 
-//optional (one or zero times)
+//optional regular expression (one or zero times)
 def OPT(r: Rexp) = ALT(r, ONE)
 
-//n-times (explicitly expanded)
+//n-times regular expression (explicitly expanded)
 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
   case 0 => ONE
   case 1 => r
   case n => SEQ(r, NTIMES(r, n - 1))
 }
 
+
+// Test Cases
+
 // the evil regular expression  a?{n} a{n}
 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 
@@ -88,3 +95,23 @@
   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL2, "a" * i))))
 }
 
+
+
+
+// size of a regular expressions - for testing purposes 
+def size(r: Rexp) : Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case CHAR(_) => 1
+  case ALT(r1, r2) => 1 + size(r1) + size(r2)
+  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+  case STAR(r) => 1 + size(r)
+}
+
+// the expicit expansion in EVIL1(n) increases
+// drastically its size
+
+size(EVIL1(1))  // 5
+size(EVIL1(3))  // 17
+size(EVIL1(5))  // 29
+size(EVIL1(7))  // 41
--- a/progs/re2.scala	Mon Feb 13 23:22:45 2017 +0000
+++ b/progs/re2.scala	Wed Mar 15 01:24:39 2017 +0000
@@ -1,5 +1,6 @@
-// version with explicit an n-times regular expression
-// this keeps the regular expression small
+// Version with explicit an explicit n-times regular expression;
+// this keeps the overall regular expression in the EVIL1 regular 
+// expression small
 
 abstract class Rexp 
 case object ZERO extends Rexp
@@ -8,7 +9,8 @@
 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
 case class STAR(r: Rexp) extends Rexp 
-case class NTIMES(r: Rexp, n: Int) extends Rexp //explicit constructor
+case class NTIMES(r: Rexp, n: Int) extends Rexp   //explicit constructor for n-times
+
 
 def nullable (r: Rexp) : Boolean = r match {
   case ZERO => false
@@ -20,6 +22,7 @@
   case NTIMES(r, i) => if (i == 0) true else nullable(r)
 }
 
+
 def der (c: Char, r: Rexp) : Rexp = r match {
   case ZERO => ZERO
   case ONE => ZERO
@@ -30,7 +33,7 @@
     else SEQ(der(c, r1), r2)
   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
   case NTIMES(r1, i) => 
-    if (i == 0) ZERO else der(c, SEQ(r1, NTIMES(r1, i - 1)))
+    if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
 }
 
 def ders (s: List[Char], r: Rexp) : Rexp = s match {
@@ -41,9 +44,12 @@
 def matches(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
 
 
-//optional: one or zero times
+//optional regular expression: one or zero times
 def OPT(r: Rexp) = ALT(r, ONE)
 
+
+// Test Cases
+
 //evil regular expressions
 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
@@ -57,11 +63,11 @@
 
 
 //test: (a?{n}) (a{n})
-for (i <- 1 to 1001 by 10) {
+for (i <- 1 to 1201 by 100) {
   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
 }
 
-for (i <- 1 to 1001 by 10) {
+for (i <- 1 to 1201 by 100) {
   println(i + " " + "%.5f".format(time_needed(2, matches(EVIL1(i), "a" * i))))
 }
 
@@ -76,3 +82,32 @@
 }
 
 
+
+// size of a regular expressions - for testing purposes 
+def size(r: Rexp) : Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case CHAR(_) => 1
+  case ALT(r1, r2) => 1 + size(r1) + size(r2)
+  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+  case STAR(r) => 1 + size(r)
+  case NTIMES(r, _) => 1 + size(r)
+}
+
+// EVIL1(n) has now a constant size, no matter
+// what n is 
+
+size(EVIL1(1))  // 7
+size(EVIL1(3))  // 7
+size(EVIL1(5))  // 7
+size(EVIL1(7))  // 7
+
+
+// but the size of the derivatives can grow 
+// quite dramatically
+
+size(ders("".toList, EVIL2))     // 5
+size(ders("a".toList, EVIL2))    // 12
+size(ders("aa".toList, EVIL2))   // 28
+size(ders("aaa".toList, EVIL2))  // 58
+size(ders("aaaa".toList, EVIL2)) // 116
--- a/progs/re3.scala	Mon Feb 13 23:22:45 2017 +0000
+++ b/progs/re3.scala	Wed Mar 15 01:24:39 2017 +0000
@@ -1,3 +1,6 @@
+// Version with simplification during derivatives;
+// this keeps the regular expressions small, which
+// is good for run-time
 
 abstract class Rexp
 case object ZERO extends Rexp
@@ -32,7 +35,7 @@
     else SEQ(der(c, r1), r2)
   case STAR(r1) => SEQ(der(c, r1), STAR(r1))
   case NTIMES(r, i) => 
-    if (i == 0) ZERO else der(c, SEQ(r, NTIMES(r, i - 1)))
+    if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
 }
 
 def simp(r: Rexp) : Rexp = r match {
@@ -63,12 +66,12 @@
 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
 
 
-var regex = NTIMES(CHAR('a'),5)
-println(matcher(regex,"aaaaa"))
-
 //one or zero
 def OPT(r: Rexp) = ALT(r, ONE)
 
+
+// Test Cases
+
 //evil regular expressions
 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
@@ -83,20 +86,43 @@
 
 
 //test: (a?{n}) (a{n})
-for (i <- 1 to 9001 by 10) {
+for (i <- 1 to 11001 by 1000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
 }
 
-for (i <- 1 to 9001 by 1000) {
+for (i <- 1 to 11001 by 1000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
 }
 
 //test: (a*)* b
-for (i <- 1 to 7000001 by 500000) {
+for (i <- 1 to 6000001 by 500000) {
+  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
+}
+
+for (i <- 1 to 6000001 by 500000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
 }
 
-for (i <- 1 to 7000001 by 500000) {
-  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
+
+
+// size of a regular expressions - for testing purposes 
+def size(r: Rexp) : Int = r match {
+  case ZERO => 1
+  case ONE => 1
+  case CHAR(_) => 1
+  case ALT(r1, r2) => 1 + size(r1) + size(r2)
+  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+  case STAR(r) => 1 + size(r)
+  case NTIMES(r, _) => 1 + size(r)
 }
 
+
+// now the size of the derivatives grows 
+// much, much slower
+
+size(ders("".toList, EVIL2))      // 5
+size(ders("a".toList, EVIL2))     // 8
+size(ders("aa".toList, EVIL2))    // 8
+size(ders("aaa".toList, EVIL2))   // 8
+size(ders("aaaa".toList, EVIL2))  // 8
+size(ders("aaaaa".toList, EVIL2)) // 8
--- a/progs/re4.scala	Mon Feb 13 23:22:45 2017 +0000
+++ b/progs/re4.scala	Wed Mar 15 01:24:39 2017 +0000
@@ -1,4 +1,6 @@
-    
+// Version which attempts to move whole strings, not
+// just characters, under derivatives whenever possible
+   
 abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
@@ -50,6 +52,7 @@
   case r => r
 }
 
+// *new*
 // derivative w.r.t. a string (iterates der)
 def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
   case (Nil, r) => r
@@ -68,6 +71,9 @@
 //one or zero
 def OPT(r: Rexp) = ALT(r, ONE)
 
+
+// Test Cases
+
 def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 
@@ -79,7 +85,6 @@
   (end - start)/(i * 1.0e9)
 }
 
-// warmup
 //test: (a?{n}) (a{n})
 for (i <- 1 to 7000001 by 500000) {
   println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))