handouts/ho02.tex
changeset 415 4ae59fd3b174
parent 414 065ca01b62ae
child 416 357c395ae838
equal deleted inserted replaced
414:065ca01b62ae 415:4ae59fd3b174
    22 match the strings).  To see the substantial differences in the left
    22 match the strings).  To see the substantial differences in the left
    23 and right plots below, note the different scales of the $x$-axes. 
    23 and right plots below, note the different scales of the $x$-axes. 
    24 
    24 
    25 
    25 
    26 \begin{center}
    26 \begin{center}
    27 Plots: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\
    27 Graphs: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\
    28 \begin{tabular}{@{}cc@{}}
    28 \begin{tabular}{@{}cc@{}}
    29 \begin{tikzpicture}
    29 \begin{tikzpicture}
    30 \begin{axis}[
    30 \begin{axis}[
    31     xlabel={$n$},
    31     xlabel={$n$},
    32     x label style={at={(1.05,0.0)}},
    32     x label style={at={(1.05,0.0)}},
    62     ytick={0,5,...,30},
    62     ytick={0,5,...,30},
    63     scaled ticks=false,
    63     scaled ticks=false,
    64     axis lines=left,
    64     axis lines=left,
    65     width=6.5cm,
    65     width=6.5cm,
    66     height=5cm]
    66     height=5cm]
    67 \addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
    67 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
    68 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
    68 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
    69 \end{axis}
    69 \end{axis}
    70 \end{tikzpicture}
    70 \end{tikzpicture}
    71 \end{tabular}
    71 \end{tabular}
    72 \end{center}
    72 \end{center}
    73 
    73 
    74 \begin{center}
    74 \begin{center}
    75 Plots: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$
    75 Graphs: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$
    76 \begin{tabular}{@{}cc@{}}
    76 \begin{tabular}{@{}cc@{}}
    77 \begin{tikzpicture}
    77 \begin{tikzpicture}
    78 \begin{axis}[
    78 \begin{axis}[
    79     xlabel={$n$},
    79     xlabel={$n$},
    80     x label style={at={(1.05,0.0)}},
    80     x label style={at={(1.05,0.0)}},
    89     width=5cm,
    89     width=5cm,
    90     height=5cm, 
    90     height=5cm, 
    91     legend entries={Java},  
    91     legend entries={Java},  
    92     legend pos=north west,
    92     legend pos=north west,
    93     legend cell align=left]
    93     legend cell align=left]
    94 \addplot[blue,mark=*, mark options={fill=white}] table {re-java.data};
    94 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
    95 \end{axis}
    95 \end{axis}
    96 \end{tikzpicture}
    96 \end{tikzpicture}
    97 &
    97 &
    98 \begin{tikzpicture}
    98 \begin{tikzpicture}
    99   \begin{axis}[
    99   \begin{axis}[
   107     ytick={0,5,...,30},
   107     ytick={0,5,...,30},
   108     scaled ticks=false,
   108     scaled ticks=false,
   109     axis lines=left,
   109     axis lines=left,
   110     width=6.5cm,
   110     width=6.5cm,
   111     height=5cm]
   111     height=5cm]
   112 \addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
   112 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
   113 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
   113 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
   114 \end{axis}
   114 \end{axis}
   115 \end{tikzpicture}
   115 \end{tikzpicture}
   116 \end{tabular}
   116 \end{tabular}
   117 \end{center}\medskip
   117 \end{center}\medskip
   480 It seems our algorithm is doing rather well in comparison:
   480 It seems our algorithm is doing rather well in comparison:
   481 
   481 
   482 \begin{center}
   482 \begin{center}
   483 \begin{tikzpicture}
   483 \begin{tikzpicture}
   484 \begin{axis}[
   484 \begin{axis}[
   485     title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
   485     title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
   486     xlabel={$n$},
   486     xlabel={$n$},
   487     x label style={at={(1.05,0.0)}},
   487     x label style={at={(1.05,0.0)}},
   488     ylabel={time in secs},
   488     ylabel={time in secs},
   489     enlargelimits=false,
   489     enlargelimits=false,
   490     xtick={0,500,...,4000},
   490     xtick={0,1000,...,6500},
   491     xmax=4100,
   491     xmax=6800,
   492     ytick={0,5,...,30},
   492     ytick={0,5,...,30},
       
   493     ymax=34,
   493     scaled ticks=false,
   494     scaled ticks=false,
   494     axis lines=left,
   495     axis lines=left,
   495     width=10cm,
   496     width=8cm,
   496     height=4.5cm, 
   497     height=4.5cm, 
   497     legend entries={Java,Scala V1},  
   498     legend entries={Java,Scala V1},  
   498     legend pos=north east,
   499     legend pos=north east,
   499     legend cell align=left]
   500     legend cell align=left]
   500 \addplot[blue,mark=*, mark options={fill=white}] 
   501 \addplot[cyan,mark=*, mark options={fill=white}] 
   501   table {re-java.data};
   502   table {re-java.data};
   502 \addplot[red,mark=triangle*,mark options={fill=white}]
   503 \addplot[red,mark=triangle*,mark options={fill=white}]
   503   table {re2c.data};
   504   table {re1a.data};
   504 \end{axis}
   505 \end{axis}
   505 \end{tikzpicture}
   506 \end{tikzpicture}
   506 \end{center}
   507 \end{center}
   507 
   508 
   508 \noindent
   509 \noindent
   509 This is no error: it hardly takes more than half a second for
   510 This is not an error: it hardly takes more than half a second for
   510 strings up to the length of 4000. After that we receive a
   511 strings up to the length of 6500. After that we receive a
   511 StackOverflow exception, but still\ldots
   512 StackOverflow exception, but still\ldots
   512 
   513 
   513 For running the algorithm with our first example, the evil
   514 For running the algorithm with our first example, the evil
   514 regular expression $a^?{}^{\{n\}}a^{\{n\}}$, we need to implement
   515 regular expression $a^?{}^{\{n\}}a^{\{n\}}$, we need to implement
   515 the optional regular expression and the exactly $n$-times
   516 the optional regular expression and the exactly $n$-times
   522 Ooops\ldots
   523 Ooops\ldots
   523 
   524 
   524 \begin{center}
   525 \begin{center}
   525 \begin{tikzpicture}
   526 \begin{tikzpicture}
   526 \begin{axis}[    
   527 \begin{axis}[    
   527     title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   528     title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   528     xlabel={$n$},
   529     xlabel={$n$},
   529     x label style={at={(1.05,0.0)}},
   530     x label style={at={(1.05,0.0)}},
   530     ylabel={time in secs},
   531     ylabel={time in secs},
   531     enlargelimits=false,
   532     enlargelimits=false,
   532     xtick={0,5,...,30},
   533     xtick={0,5,...,30},
   533     xmax=30,
   534     xmax=32,
   534     ytick={0,5,...,30},
   535     ytick={0,5,...,30},
   535     scaled ticks=false,
   536     scaled ticks=false,
   536     axis lines=left,
   537     axis lines=left,
   537     width=6cm,
   538     width=6cm,
   538     height=5cm, 
   539     height=5cm, 
   539     legend entries={Python,Ruby,Scala V1},  
   540     legend entries={Python,Ruby,Scala V1},  
   540     legend pos=outer north east,
   541     legend pos=outer north east,
   541     legend cell align=left  
   542     legend cell align=left]
   542 ]
       
   543 \addplot[blue,mark=*, mark options={fill=white}] 
   543 \addplot[blue,mark=*, mark options={fill=white}] 
   544   table {re-python.data};
   544   table {re-python.data};
   545 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   545 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   546   table {re-ruby.data};  
   546   table {re-ruby.data};  
   547 \addplot[red,mark=triangle*,mark options={fill=white}] 
   547 \addplot[red,mark=triangle*,mark options={fill=white}] 
   583 effect?
   583 effect?
   584 
   584 
   585 \begin{center}
   585 \begin{center}
   586 \begin{tikzpicture}
   586 \begin{tikzpicture}
   587 \begin{axis}[
   587 \begin{axis}[
   588     title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   588     title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   589     xlabel={$n$},
   589     xlabel={$n$},
   590     x label style={at={(1.01,0.0)}},
   590     x label style={at={(1.01,0.0)}},
   591     ylabel={time in secs},
   591     ylabel={time in secs},
   592     enlargelimits=false,
   592     enlargelimits=false,
   593     xtick={0,100,...,1000},
   593     xtick={0,100,...,1000},
   605 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   605 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   606   table {re-ruby.data};  
   606   table {re-ruby.data};  
   607 \addplot[red,mark=triangle*,mark options={fill=white}] 
   607 \addplot[red,mark=triangle*,mark options={fill=white}] 
   608   table {re1.data};  
   608   table {re1.data};  
   609 \addplot[green,mark=square*,mark options={fill=white}] 
   609 \addplot[green,mark=square*,mark options={fill=white}] 
   610   table {re2b.data};
   610   table {re2.data};
   611 \end{axis}
   611 \end{axis}
   612 \end{tikzpicture}
   612 \end{tikzpicture}
   613 \end{center}
   613 \end{center}
   614 
   614 
   615 \noindent Now we are talking business! The modified matcher 
   615 \noindent Now we are talking business! The modified matcher 
   675 \end{figure}
   675 \end{figure}
   676 
   676 
   677 \begin{center}
   677 \begin{center}
   678 \begin{tikzpicture}
   678 \begin{tikzpicture}
   679 \begin{axis}[
   679 \begin{axis}[
   680     title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   680     title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
   681     xlabel={$n$},
   681     xlabel={$n$},
   682     x label style={at={(1.04,0.0)}},
   682     x label style={at={(1.04,0.0)}},
   683     ylabel={time in secs},
   683     ylabel={time in secs},
   684     enlargelimits=false,
   684     enlargelimits=false,
   685     xtick={0,2000,...,12000},
   685     xtick={0,2000,...,12000},
   686     xmax=13000,
   686     xmax=13000,
   687     ytick={0,5,...,30},
   687     ytick={0,5,...,30},
       
   688     ymax=12,
   688     scaled ticks=false,
   689     scaled ticks=false,
   689     axis lines=left,
   690     axis lines=left,
   690     width=9cm,
   691     width=9cm,
   691     height=5cm, 
   692     height=5cm, 
   692     legend entries={Scala V2,Scala V3}]
   693     legend entries={Scala V2,Scala V3},
   693 \addplot[green,mark=square*,mark options={fill=white}] table {re2b.data};
   694     legend pos=outer north east,
       
   695     legend cell align=left]
       
   696 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
   694 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
   697 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
   695 \end{axis}
   698 \end{axis}
   696 \end{tikzpicture}
   699 \end{tikzpicture}
   697 \end{center}
   700 \end{center}
   698 
   701 
       
   702 \noindent
       
   703 To reacap, Python and Ruby needed approximately 30 seconds to match
       
   704 a string of 28 \texttt{a}s and the regular expression $a^{?\{n\}} \cdot a^{\{n\}}$. 
       
   705 We need a third of this time to do the same with strings up to 12,000 \texttt{a}s. 
       
   706 Similarly, Java needed 30 seconds to find out the regular expression 
       
   707 $(a^*)^* \cdot b$ does not match the string of 28 \texttt{a}s. We can do
       
   708 the same in approximately 5 seconds for strings of 6000000 \texttt{a}s:
       
   709 
       
   710 
   699 \begin{center}
   711 \begin{center}
   700 \begin{tikzpicture}
   712 \begin{tikzpicture}
   701 \begin{axis}[
   713 \begin{axis}[
   702     title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
   714     title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
   703     xlabel={$n$},
   715     xlabel={$n$},
   704     x label style={at={(1.09,0.0)}},
   716     x label style={at={(1.09,0.0)}},
   705     ylabel={time in secs},
   717     ylabel={time in secs},
   706     enlargelimits=false,
   718     enlargelimits=false,
   707     %xtick={0,1e+6,...,6e+6},
   719     xmax=7700000,
   708     xmax=6200000,
       
   709     ytick={0,5,...,30},
   720     ytick={0,5,...,30},
   710     ymax=33,
   721     ymax=15,
   711     scaled ticks=false,
   722     %scaled ticks=false,
   712     axis lines=left,
   723     axis lines=left,
   713     width=9cm,
   724     width=9cm,
   714     height=5cm, 
   725     height=5cm, 
   715     legend entries={Scala V3}]
   726     legend entries={Scala V2, Scala V3},
       
   727     legend pos=outer north east,
       
   728     legend cell align=left]
       
   729 \addplot[green,mark=square*,mark options={fill=white}] table {re2a.data};
   716 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
   730 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
   717 \end{axis}
   731 \end{axis}
   718 \end{tikzpicture}
   732 \end{tikzpicture}
   719 \end{center}
   733 \end{center}
   720 
   734 
   721 \section{Epilogue}
   735 \subsection*{Epilogue}
   722 
   736 
   723 TBD
   737 (23/Aug/2016) I recently found another place where this algorithm can be 
       
   738 sped (this idea is not integrated with what is coming next,
       
   739 but I present it nonetheless). The idea is to define \texttt{ders}
       
   740 not such that it iterates the derivative character-by-character, but
       
   741 in bigger chunks. The resulting code for \texttt{ders2} looks as
       
   742 follows:
       
   743 
       
   744 \lstinputlisting[numbers=none]{../progs/app52.scala} 
       
   745 
       
   746 \noindent
       
   747 I have not fully understood why this version is much faster,
       
   748 but it seems it is a combination of the clauses for \texttt{ALT}
       
   749 and \texttt{SEQ}. In the latter case we call \texttt{der} with 
       
   750 a single character and this potentially produces an alternative.
       
   751 The derivative of such an alternative can then be more effeciently  
       
   752 calculated by \texttt{ders2} since it pushes a whole string
       
   753 under an \texttt{ALT}. The numbers are that in the second case  
       
   754 $(a^*)^* \cdot b$ both versions are pretty much the same, but in the 
       
   755 first case $a^{?\{n\}} \cdot a^{\{n\}}$ the improvement gives 
       
   756 another factor of 100 speedup. Nice!
       
   757 
       
   758 \begin{center}
       
   759 \begin{tabular}{cc}
       
   760 \begin{tikzpicture}
       
   761 \begin{axis}[
       
   762     title={Graph: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
       
   763     xlabel={$n$},
       
   764     x label style={at={(1.04,0.0)}},
       
   765     ylabel={time in secs},
       
   766     enlargelimits=false,
       
   767     xmax=7100000,
       
   768     ytick={0,5,...,30},
       
   769     ymax=33,
       
   770     %scaled ticks=false,
       
   771     axis lines=left,
       
   772     width=5cm,
       
   773     height=5cm, 
       
   774     legend entries={Scala V3, Scala V4},
       
   775     legend pos=north west]
       
   776 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
       
   777 \addplot[purple,mark=square*,mark options={fill=white}] table {re4.data};
       
   778 \end{axis}
       
   779 \end{tikzpicture}
       
   780 &
       
   781 \begin{tikzpicture}
       
   782 \begin{axis}[
       
   783     title={Graph: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
       
   784     xlabel={$n$},
       
   785     x label style={at={(1.09,0.0)}},
       
   786     ylabel={time in secs},
       
   787     enlargelimits=false,
       
   788     xmax=8100000,
       
   789     ytick={0,5,...,30},
       
   790     ymax=33,
       
   791     %scaled ticks=false,
       
   792     axis lines=left,
       
   793     width=5cm,
       
   794     height=5cm, 
       
   795     legend entries={Scala V3, Scala V4},
       
   796     legend pos=north west]
       
   797 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
       
   798 \addplot[purple,mark=square*,mark options={fill=white}] table {re4a.data};
       
   799 \end{axis}
       
   800 \end{tikzpicture}
       
   801 \end{tabular}
       
   802 \end{center}
   724 
   803 
   725 
   804 
   726 \section*{Proofs}
   805 \section*{Proofs}
   727 
   806 
   728 You might not like doing proofs. But they serve a very
   807 You might not like doing proofs. But they serve a very