handouts/ho04.tex
changeset 400 e4afe3f46c29
parent 394 2f9fe225ecc8
child 417 e74c696821a2
--- a/handouts/ho04.tex	Wed Apr 06 11:51:33 2016 +0100
+++ b/handouts/ho04.tex	Wed Apr 06 12:18:54 2016 +0100
@@ -5,6 +5,7 @@
 
 
 \begin{document}
+\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
 
 \section*{Handout 4 (Sulzmann \& Lu Algorithm)}
 
@@ -42,8 +43,8 @@
 \begin{tabular}{cc}
 \begin{tabular}{@{}rrl@{}}
 \multicolumn{3}{c}{regular expressions}\medskip\\
-  $r$ & $::=$  & $\varnothing$\\
-      & $\mid$ & $\epsilon$   \\
+  $r$ & $::=$  & $\ZERO$\\
+      & $\mid$ & $\ONE$   \\
       & $\mid$ & $c$          \\
       & $\mid$ & $r_1 \cdot r_2$\\
       & $\mid$ & $r_1 + r_2$   \\
@@ -57,7 +58,7 @@
       &        & $Empty$   \\
       & $\mid$ & $Char(c)$          \\
       & $\mid$ & $Seq(v_1,v_2)$\\
-      & $\mid$ & $Left(v)$   \\
+      & $\mid$ & $\Left(v)$   \\
       & $\mid$ & $Right(v)$  \\
       & $\mid$ & $[v_1,\ldots\,v_n]$ \\
 \end{tabular}
@@ -66,10 +67,10 @@
 
 \noindent The reason is that there is a very strong
 correspondence between them. There is no value for the
-$\varnothing$ regular expression, since it does not match any
+$\ZERO$ regular expression, since it does not match any
 string. Otherwise there is exactly one value corresponding to
 each regular expression with the exception of $r_1 + r_2$
-where there are two values, namely $Left(v)$ and $Right(v)$
+where there are two values, namely $\Left(v)$ and $Right(v)$
 corresponding to the two alternatives. Note that $r^*$ is
 associated with a list of values, one for each copy of $r$
 that was needed to match the string. This means we might also
@@ -77,19 +78,13 @@
 of $r^*$. For sequence, there is exactly one value, composed 
 of two component values ($v_1$ and $v_2$).
 
-To emphasise the connection between regular expressions and
-values, I have in my implementation the convention that
-regular expressions and values have the same name, except that
+My definition of regular expressions and values in Scala is
+shown below. I have in my implementation the convention that
 regular expressions are written entirely with upper-case
 letters, while values just start with a single upper-case
-character and the rest are lower-case letters. My definition
-of regular expressions and values in Scala is shown below. I
-use this in the REPL of Scala; when I use the Scala compiler I
-unfortunately need to rename some constructors, because Scala
-on Macs does not like classes that are called \pcode{EMPTY}
-and \pcode{Empty}.
+character and the rest are lower-case letters.
  
- {\small\lstinputlisting[language=Scala,numbers=none]
+{\small\lstinputlisting[language=Scala,numbers=none]
 {../progs/app01.scala}}
 
  
@@ -141,16 +136,16 @@
 
 \begin{center}
 \begin{tabular}{lcl}
-  $mkeps(\epsilon)$       & $\dn$ & $Empty$\\
+  $mkeps(\ONE)$       & $\dn$ & $Empty$\\
   $mkeps(r_1 + r_2)$      & $\dn$ & if $nullable(r_1)$  \\
-                          &       & then $Left(mkeps(r_1))$\\
+                          &       & then $\Left(mkeps(r_1))$\\
                           &       & else $Right(mkeps(r_2))$\\
   $mkeps(r_1 \cdot r_2)$  & $\dn$ & $Seq(mkeps(r_1),mkeps(r_2))$\\
   $mkeps(r^*)$            & $\dn$ & $[]$  \\
 \end{tabular}
 \end{center}
 
-\noindent There are no cases for $\varnothing$ and $c$, since
+\noindent There are no cases for $\ZERO$ and $c$, since
 these regular expression cannot match the empty string. Note
 also that in case of alternatives we give preference to the
 regular expression on the left-hand side. This will become
@@ -170,11 +165,11 @@
 
 \begin{center}
 \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
-  $inj\,(c)\,c\,Empty$            & $\dn$  & $Char\,c$\\
-  $inj\,(r_1 + r_2)\,c\,Left(v)$  & $\dn$  & $Left(inj\,r_1\,c\,v)$\\
-  $inj\,(r_1 + r_2)\,c\,Right(v)$ & $\dn$  & $Right(inj\,r_2\,c\,v)$\\
+  $inj\,(c)\,c\,Empty$            & $\dn$ & $Char\,c$\\
+  $inj\,(r_1 + r_2)\,c\,\Left(v)$ & $\dn$ & $\Left(inj\,r_1\,c\,v)$\\
+  $inj\,(r_1 + r_2)\,c\,Right(v)$ & $\dn$ & $Right(inj\,r_2\,c\,v)$\\
   $inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$ & $\dn$  & $Seq(inj\,r_1\,c\,v_1,v_2)$\\
-  $inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$ & $\dn$  & $Seq(inj\,r_1\,c\,v_1,v_2)$\\
+  $inj\,(r_1 \cdot r_2)\,c\,\Left(Seq(v_1,v_2))$ & $\dn$  & $Seq(inj\,r_1\,c\,v_1,v_2)$\\
   $inj\,(r_1 \cdot r_2)\,c\,Right(v)$ & $\dn$  & $Seq(mkeps(r_1),inj\,r_2\,c\,v)$\\
   $inj\,(r^*)\,c\,Seq(v,vs)$         & $\dn$  & $inj\,r\,c\,v\,::\,vs$\\
 \end{tabular}
@@ -199,9 +194,9 @@
 \begin{center}
 \begin{tabular}{ll}
 $r_1$: & $a \cdot (b \cdot c)$\\
-$r_2$: & $\epsilon \cdot (b \cdot c)$\\
-$r_3$: & $(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$\\
-$r_4$: & $(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$\\
+$r_2$: & $\ONE \cdot (b \cdot c)$\\
+$r_3$: & $(\ZERO \cdot (b \cdot c)) + (\ONE \cdot c)$\\
+$r_4$: & $(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$\\
 \end{tabular}
 \end{center}
 
@@ -210,7 +205,7 @@
 This means we can use the function $mkeps$ to calculate a
 value for how $r_4$ was able to match the empty string.
 Remember that this function gives preference for alternatives
-on the left-hand side. However there is only $\epsilon$ on the
+on the left-hand side. However there is only $\ONE$ on the
 very right-hand side of $r_4$ that matches the empty string.
 Therefore $mkeps$ returns the value
 
@@ -218,19 +213,19 @@
 $v_4:\;Right(Right(Empty))$
 \end{center}
 
-\noindent If there had been a $\epsilon$ on the left, then
+\noindent If there had been a $\ONE$ on the left, then
 $mkeps$ would have returned something of the form
-$Left(\ldots)$. The point is that from this value we can
+$\Left(\ldots)$. The point is that from this value we can
 directly read off which part of $r_4$ matched the empty
 string: take the right-alternative first, and then the
 right-alternative again. Remember $r_4$ is of the form
 
 \begin{center}
-$r_4$:\;$(\varnothing \cdot (b \cdot c)) + 
-         ((\varnothing \cdot c) + \underline{\epsilon})$\\
+$r_4$:\;$(\ZERO \cdot (b \cdot c)) + 
+         ((\ZERO \cdot c) + \underline{\ONE})$\\
 \end{center}
 
-\noindent the value tells us that the underlined $\epsilon$
+\noindent the value tells us that the underlined $\ONE$
 is responsible for matching the empty string.
 
 Next we have to ``inject'' the last character, that is $c$ in
@@ -243,7 +238,7 @@
 \end{center}
 
 \noindent This is the correct result, because $r_3$ needs
-to use the right-hand alternative, and then $\epsilon$ needs
+to use the right-hand alternative, and then $\ONE$ needs
 to match the empty string and $c$ needs to match $c$.
 Next we need to inject back the letter $b$ into $v_3$. This
 gives
@@ -272,7 +267,7 @@
 \begin{tabular}{lcl}
   $|Empty|$     & $\dn$ & $[]$\\
   $|Char(c)|$   & $\dn$ & $[c]$\\
-  $|Left(v)|$   & $\dn$ & $|v|$\\
+  $|\Left(v)|$   & $\dn$ & $|v|$\\
   $|Right(v)|$  & $\dn$ & $|v|$\\
   $|Seq(v_1,v_2)|$& $\dn$ & $|v_1| \,@\, |v_2|$\\
   $|[v_1,\ldots ,v_n]|$ & $\dn$ & $|v_1| \,@\ldots @\, |v_n|$\\
@@ -315,11 +310,11 @@
 
 \begin{center}
 $r_4 = der\,c\,r_3 = 
-(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$
+(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$
 \end{center}
 
 \noindent Simplifying this regular expression would just give
-us $\epsilon$. Running $mkeps$ with this regular expression as
+us $\ONE$. Running $mkeps$ with this regular expression as
 input, however, would then provide us with $Empty$ instead of
 $Right(Right(Empty))$ that was obtained without the
 simplification. The problem is we need to recreate this more
@@ -333,18 +328,18 @@
 
 \begin{center}
 \begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
-$r \cdot \varnothing$ & $\mapsto$ & $\varnothing$\\ 
-$\varnothing \cdot r$ & $\mapsto$ & $\varnothing$\\ 
-$r \cdot \epsilon$ & $\mapsto$ & $r$\\ 
-$\epsilon \cdot r$ & $\mapsto$ & $r$\\ 
-$r + \varnothing$ & $\mapsto$ & $r$\\ 
-$\varnothing + r$ & $\mapsto$ & $r$\\ 
+$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
+$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
+$r \cdot \ONE$ & $\mapsto$ & $r$\\ 
+$\ONE \cdot r$ & $\mapsto$ & $r$\\ 
+$r + \ZERO$ & $\mapsto$ & $r$\\ 
+$\ZERO + r$ & $\mapsto$ & $r$\\ 
 $r + r$ & $\mapsto$ & $r$\\ 
 \end{tabular}
 \end{center}
 
 \noindent Applying them to $r_4$ will require several nested
-simplifications in order end up with just $\epsilon$. However,
+simplifications in order end up with just $\ONE$. However,
 it is possible to apply them in a depth-first, or inside-out,
 manner in order to calculate this simplified regular
 expression.
@@ -358,7 +353,7 @@
 component regular expressions (if they can be simplified) but
 also two rectification functions $f_{1s}$ and $f_{2s}$. We
 need to assemble them in order to obtain a rectified value for
-$r_1 + r_2$. In case $r_{1s}$ simplified to $\varnothing$, we
+$r_1 + r_2$. In case $r_{1s}$ simplified to $\ZERO$, we
 continue the derivative calculation with $r_{2s}$. The
 Sulzmann \& Lu algorithm would return a corresponding value,
 say $v_{2s}$. But now this value needs to be ``rectified'' to
@@ -408,12 +403,12 @@
 \quad case $r = r_1 + r_2$\\
 \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\
 \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\
-\qquad case $r_{1s} = \varnothing$: 
+\qquad case $r_{1s} = \ZERO$: 
        return $(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$\\
-\qquad case $r_{2s} = \varnothing$: 
-       return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\
+\qquad case $r_{2s} = \ZERO$: 
+       return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\
 \qquad case $r_{1s} = r_{2s}$:
-       return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\
+       return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\
 \qquad otherwise: 
        return $(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$\\
 \end{tabular}
@@ -423,8 +418,8 @@
 $r_1$ and $r_2$. This gives simplified regular expressions,
 $r_{1s}$ and $r_{2s}$, as well as two rectification functions
 $f_{1s}$ and $f_{2s}$. We next need to test whether the
-simplified regular expressions are $\varnothing$ so as to make
-further simplifications. In case $r_{1s}$ is $\varnothing$,
+simplified regular expressions are $\ZERO$ so as to make
+further simplifications. In case $r_{1s}$ is $\ZERO$,
 then we can return $r_{2s}$ (the other alternative). However
 for this we need to build a corresponding rectification 
 function, which as said above is
@@ -433,21 +428,21 @@
 $\lambda v.\,Right(f_{2s}(v))$
 \end{center}
 
-\noindent The case where $r_{2s} = \varnothing$ is similar:
-We return $r_{1s}$ and rectify with $Left(\_)$ and the
+\noindent The case where $r_{2s} = \ZERO$ is similar:
+We return $r_{1s}$ and rectify with $\Left(\_)$ and the
 other calculated rectification function $f_{1s}$. This gives
 
 \begin{center}
-$\lambda v.\,Left(f_{1s}(v))$
+$\lambda v.\,\Left(f_{1s}(v))$
 \end{center}
 
 \noindent The next case where $r_{1s} = r_{2s}$ can be treated
-like the one where $r_{2s} = \varnothing$. We return $r_{1s}$
-and rectify with $Left(\_)$ and so on.
+like the one where $r_{2s} = \ZERO$. We return $r_{1s}$
+and rectify with $\Left(\_)$ and so on.
 
 
 The otherwise-case is slightly more complicated. In this case
-neither $r_{1s}$ nor $r_{2s}$ are $\varnothing$ and also
+neither $r_{1s}$ nor $r_{2s}$ are $\ZERO$ and also
 $r_{1s} \not= r_{2s}$, which means no further simplification
 can be applied. Accordingly, we return $r_{1s} + r_{2s}$ as
 the simplified regular expression. In principle we also do not
@@ -458,14 +453,14 @@
 rectification functions $f_{1s}$ and $f_{2s}$. We can do this
 by defining a rectification function $f_{alt}$ which takes two
 rectification functions as arguments and applies them
-according to whether the value is of the form $Left(\_)$ or
+according to whether the value is of the form $\Left(\_)$ or
 $Right(\_)$:
 
 \begin{center}
 \begin{tabular}{l@{\hspace{1mm}}l}
 $f_{alt}(f_1, f_2) \dn$\\
-\qquad $\lambda v.\,$ case $v = Left(v')$: 
-      & return $Left(f_1(v'))$\\
+\qquad $\lambda v.\,$ case $v = \Left(v')$: 
+      & return $\Left(f_1(v'))$\\
 \qquad \phantom{$\lambda v.\,$} case $v = Right(v')$: 
       & return $Right(f_2(v'))$\\      
 \end{tabular}
@@ -481,13 +476,13 @@
 \quad case $r = r_1 \cdot r_2$\\
 \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\
 \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\
-\qquad case $r_{1s} = \varnothing$: 
-       return $(\varnothing, f_{error})$\\
-\qquad case $r_{2s} = \varnothing$: 
-       return $(\varnothing, f_{error})$\\
-\qquad case $r_{1s} = \epsilon$: 
+\qquad case $r_{1s} = \ZERO$: 
+       return $(\ZERO, f_{error})$\\
+\qquad case $r_{2s} = \ZERO$: 
+       return $(\ZERO, f_{error})$\\
+\qquad case $r_{1s} = \ONE$: 
 return $(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$\\
-\qquad case $r_{2s} = \epsilon$: 
+\qquad case $r_{2s} = \ONE$: 
 return $(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$\\
 \qquad otherwise: 
        return $(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$\\
@@ -506,16 +501,16 @@
 \end{tabular}
 \end{center}
 
-\noindent Note that in the case of $r_{1s} = \varnothing$
+\noindent Note that in the case of $r_{1s} = \ZERO$
 (similarly $r_{2s}$) we use the function $f_{error}$ for
 rectification. If you think carefully, then you will realise
 that this function will actually never been called. This is
-because a sequence with $\varnothing$ will never recognise any
+because a sequence with $\ZERO$ will never recognise any
 string and therefore the second phase of the algorithm would
 never been called. The simplification function still expects
 us to give a function. So in my own implementation I just
 returned a function that raises an error. In the case
-where $r_{1s} = \epsilon$ (similarly $r_{2s}$) we have
+where $r_{1s} = \ONE$ (similarly $r_{2s}$) we have
 to create a sequence where the first component is a rectified
 version of $Empty$. Therefore we call $f_{1s}$ with $Empty$.
 
@@ -541,12 +536,12 @@
 \quad case $r = r_1 + r_2$\\
 \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\
 \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\
-\qquad case $r_{1s} = \varnothing$: 
+\qquad case $r_{1s} = \ZERO$: 
        return $(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$\\
-\qquad case $r_{2s} = \varnothing$: 
-       return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\
+\qquad case $r_{2s} = \ZERO$: 
+       return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\
 \qquad case $r_{1s} = r_{2s}$:
-       return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\
+       return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\
 \qquad otherwise: 
        return $(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$
        \medskip\\
@@ -554,13 +549,13 @@
 \quad case $r = r_1 \cdot r_2$\\
 \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\
 \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\
-\qquad case $r_{1s} = \varnothing$: 
-       return $(\varnothing, f_{error})$\\
-\qquad case $r_{2s} = \varnothing$: 
-       return $(\varnothing, f_{error})$\\
-\qquad case $r_{1s} = \epsilon$: 
+\qquad case $r_{1s} = \ZERO$: 
+       return $(\ZERO, f_{error})$\\
+\qquad case $r_{2s} = \ZERO$: 
+       return $(\ZERO, f_{error})$\\
+\qquad case $r_{1s} = \ONE$: 
 return $(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$\\
-\qquad case $r_{2s} = \epsilon$: 
+\qquad case $r_{2s} = \ONE$: 
 return $(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$\\
 \qquad otherwise: 
        return $(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$
@@ -655,7 +650,7 @@
 \begin{tabular}{lcl}
   $env(Empty)$     & $\dn$ & $[]$\\
   $env(Char(c))$   & $\dn$ & $[]$\\
-  $env(Left(v))$   & $\dn$ & $env(v)$\\
+  $env(\Left(v))$   & $\dn$ & $env(v)$\\
   $env(Right(v))$  & $\dn$ & $env(v)$\\
   $env(Seq(v_1,v_2))$& $\dn$ & $env(v_1) \,@\, env(v_2)$\\
   $env([v_1,\ldots ,v_n])$ & $\dn$ &