# HG changeset patch # User Christian Urban # Date 1459941534 -3600 # Node ID e4afe3f46c295d806d8441d65a2688c23fbd9f00 # Parent 5c1fbb39c93e0bfdc3d00c5cb4982335091a1a5d updated diff -r 5c1fbb39c93e -r e4afe3f46c29 handouts/ho04.pdf Binary file handouts/ho04.pdf has changed diff -r 5c1fbb39c93e -r e4afe3f46c29 handouts/ho04.tex --- a/handouts/ho04.tex Wed Apr 06 11:51:33 2016 +0100 +++ b/handouts/ho04.tex Wed Apr 06 12:18:54 2016 +0100 @@ -5,6 +5,7 @@ \begin{document} +\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016} \section*{Handout 4 (Sulzmann \& Lu Algorithm)} @@ -42,8 +43,8 @@ \begin{tabular}{cc} \begin{tabular}{@{}rrl@{}} \multicolumn{3}{c}{regular expressions}\medskip\\ - $r$ & $::=$ & $\varnothing$\\ - & $\mid$ & $\epsilon$ \\ + $r$ & $::=$ & $\ZERO$\\ + & $\mid$ & $\ONE$ \\ & $\mid$ & $c$ \\ & $\mid$ & $r_1 \cdot r_2$\\ & $\mid$ & $r_1 + r_2$ \\ @@ -57,7 +58,7 @@ & & $Empty$ \\ & $\mid$ & $Char(c)$ \\ & $\mid$ & $Seq(v_1,v_2)$\\ - & $\mid$ & $Left(v)$ \\ + & $\mid$ & $\Left(v)$ \\ & $\mid$ & $Right(v)$ \\ & $\mid$ & $[v_1,\ldots\,v_n]$ \\ \end{tabular} @@ -66,10 +67,10 @@ \noindent The reason is that there is a very strong correspondence between them. There is no value for the -$\varnothing$ regular expression, since it does not match any +$\ZERO$ regular expression, since it does not match any string. Otherwise there is exactly one value corresponding to each regular expression with the exception of $r_1 + r_2$ -where there are two values, namely $Left(v)$ and $Right(v)$ +where there are two values, namely $\Left(v)$ and $Right(v)$ corresponding to the two alternatives. Note that $r^*$ is associated with a list of values, one for each copy of $r$ that was needed to match the string. This means we might also @@ -77,19 +78,13 @@ of $r^*$. For sequence, there is exactly one value, composed of two component values ($v_1$ and $v_2$). -To emphasise the connection between regular expressions and -values, I have in my implementation the convention that -regular expressions and values have the same name, except that +My definition of regular expressions and values in Scala is +shown below. I have in my implementation the convention that regular expressions are written entirely with upper-case letters, while values just start with a single upper-case -character and the rest are lower-case letters. My definition -of regular expressions and values in Scala is shown below. I -use this in the REPL of Scala; when I use the Scala compiler I -unfortunately need to rename some constructors, because Scala -on Macs does not like classes that are called \pcode{EMPTY} -and \pcode{Empty}. +character and the rest are lower-case letters. - {\small\lstinputlisting[language=Scala,numbers=none] +{\small\lstinputlisting[language=Scala,numbers=none] {../progs/app01.scala}} @@ -141,16 +136,16 @@ \begin{center} \begin{tabular}{lcl} - $mkeps(\epsilon)$ & $\dn$ & $Empty$\\ + $mkeps(\ONE)$ & $\dn$ & $Empty$\\ $mkeps(r_1 + r_2)$ & $\dn$ & if $nullable(r_1)$ \\ - & & then $Left(mkeps(r_1))$\\ + & & then $\Left(mkeps(r_1))$\\ & & else $Right(mkeps(r_2))$\\ $mkeps(r_1 \cdot r_2)$ & $\dn$ & $Seq(mkeps(r_1),mkeps(r_2))$\\ $mkeps(r^*)$ & $\dn$ & $[]$ \\ \end{tabular} \end{center} -\noindent There are no cases for $\varnothing$ and $c$, since +\noindent There are no cases for $\ZERO$ and $c$, since these regular expression cannot match the empty string. Note also that in case of alternatives we give preference to the regular expression on the left-hand side. This will become @@ -170,11 +165,11 @@ \begin{center} \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} - $inj\,(c)\,c\,Empty$ & $\dn$ & $Char\,c$\\ - $inj\,(r_1 + r_2)\,c\,Left(v)$ & $\dn$ & $Left(inj\,r_1\,c\,v)$\\ - $inj\,(r_1 + r_2)\,c\,Right(v)$ & $\dn$ & $Right(inj\,r_2\,c\,v)$\\ + $inj\,(c)\,c\,Empty$ & $\dn$ & $Char\,c$\\ + $inj\,(r_1 + r_2)\,c\,\Left(v)$ & $\dn$ & $\Left(inj\,r_1\,c\,v)$\\ + $inj\,(r_1 + r_2)\,c\,Right(v)$ & $\dn$ & $Right(inj\,r_2\,c\,v)$\\ $inj\,(r_1 \cdot r_2)\,c\,Seq(v_1,v_2)$ & $\dn$ & $Seq(inj\,r_1\,c\,v_1,v_2)$\\ - $inj\,(r_1 \cdot r_2)\,c\,Left(Seq(v_1,v_2))$ & $\dn$ & $Seq(inj\,r_1\,c\,v_1,v_2)$\\ + $inj\,(r_1 \cdot r_2)\,c\,\Left(Seq(v_1,v_2))$ & $\dn$ & $Seq(inj\,r_1\,c\,v_1,v_2)$\\ $inj\,(r_1 \cdot r_2)\,c\,Right(v)$ & $\dn$ & $Seq(mkeps(r_1),inj\,r_2\,c\,v)$\\ $inj\,(r^*)\,c\,Seq(v,vs)$ & $\dn$ & $inj\,r\,c\,v\,::\,vs$\\ \end{tabular} @@ -199,9 +194,9 @@ \begin{center} \begin{tabular}{ll} $r_1$: & $a \cdot (b \cdot c)$\\ -$r_2$: & $\epsilon \cdot (b \cdot c)$\\ -$r_3$: & $(\varnothing \cdot (b \cdot c)) + (\epsilon \cdot c)$\\ -$r_4$: & $(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$\\ +$r_2$: & $\ONE \cdot (b \cdot c)$\\ +$r_3$: & $(\ZERO \cdot (b \cdot c)) + (\ONE \cdot c)$\\ +$r_4$: & $(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$\\ \end{tabular} \end{center} @@ -210,7 +205,7 @@ This means we can use the function $mkeps$ to calculate a value for how $r_4$ was able to match the empty string. Remember that this function gives preference for alternatives -on the left-hand side. However there is only $\epsilon$ on the +on the left-hand side. However there is only $\ONE$ on the very right-hand side of $r_4$ that matches the empty string. Therefore $mkeps$ returns the value @@ -218,19 +213,19 @@ $v_4:\;Right(Right(Empty))$ \end{center} -\noindent If there had been a $\epsilon$ on the left, then +\noindent If there had been a $\ONE$ on the left, then $mkeps$ would have returned something of the form -$Left(\ldots)$. The point is that from this value we can +$\Left(\ldots)$. The point is that from this value we can directly read off which part of $r_4$ matched the empty string: take the right-alternative first, and then the right-alternative again. Remember $r_4$ is of the form \begin{center} -$r_4$:\;$(\varnothing \cdot (b \cdot c)) + - ((\varnothing \cdot c) + \underline{\epsilon})$\\ +$r_4$:\;$(\ZERO \cdot (b \cdot c)) + + ((\ZERO \cdot c) + \underline{\ONE})$\\ \end{center} -\noindent the value tells us that the underlined $\epsilon$ +\noindent the value tells us that the underlined $\ONE$ is responsible for matching the empty string. Next we have to ``inject'' the last character, that is $c$ in @@ -243,7 +238,7 @@ \end{center} \noindent This is the correct result, because $r_3$ needs -to use the right-hand alternative, and then $\epsilon$ needs +to use the right-hand alternative, and then $\ONE$ needs to match the empty string and $c$ needs to match $c$. Next we need to inject back the letter $b$ into $v_3$. This gives @@ -272,7 +267,7 @@ \begin{tabular}{lcl} $|Empty|$ & $\dn$ & $[]$\\ $|Char(c)|$ & $\dn$ & $[c]$\\ - $|Left(v)|$ & $\dn$ & $|v|$\\ + $|\Left(v)|$ & $\dn$ & $|v|$\\ $|Right(v)|$ & $\dn$ & $|v|$\\ $|Seq(v_1,v_2)|$& $\dn$ & $|v_1| \,@\, |v_2|$\\ $|[v_1,\ldots ,v_n]|$ & $\dn$ & $|v_1| \,@\ldots @\, |v_n|$\\ @@ -315,11 +310,11 @@ \begin{center} $r_4 = der\,c\,r_3 = -(\varnothing \cdot (b \cdot c)) + ((\varnothing \cdot c) + \epsilon)$ +(\ZERO \cdot (b \cdot c)) + ((\ZERO \cdot c) + \ONE)$ \end{center} \noindent Simplifying this regular expression would just give -us $\epsilon$. Running $mkeps$ with this regular expression as +us $\ONE$. Running $mkeps$ with this regular expression as input, however, would then provide us with $Empty$ instead of $Right(Right(Empty))$ that was obtained without the simplification. The problem is we need to recreate this more @@ -333,18 +328,18 @@ \begin{center} \begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l} -$r \cdot \varnothing$ & $\mapsto$ & $\varnothing$\\ -$\varnothing \cdot r$ & $\mapsto$ & $\varnothing$\\ -$r \cdot \epsilon$ & $\mapsto$ & $r$\\ -$\epsilon \cdot r$ & $\mapsto$ & $r$\\ -$r + \varnothing$ & $\mapsto$ & $r$\\ -$\varnothing + r$ & $\mapsto$ & $r$\\ +$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ +$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ +$r \cdot \ONE$ & $\mapsto$ & $r$\\ +$\ONE \cdot r$ & $\mapsto$ & $r$\\ +$r + \ZERO$ & $\mapsto$ & $r$\\ +$\ZERO + r$ & $\mapsto$ & $r$\\ $r + r$ & $\mapsto$ & $r$\\ \end{tabular} \end{center} \noindent Applying them to $r_4$ will require several nested -simplifications in order end up with just $\epsilon$. However, +simplifications in order end up with just $\ONE$. However, it is possible to apply them in a depth-first, or inside-out, manner in order to calculate this simplified regular expression. @@ -358,7 +353,7 @@ component regular expressions (if they can be simplified) but also two rectification functions $f_{1s}$ and $f_{2s}$. We need to assemble them in order to obtain a rectified value for -$r_1 + r_2$. In case $r_{1s}$ simplified to $\varnothing$, we +$r_1 + r_2$. In case $r_{1s}$ simplified to $\ZERO$, we continue the derivative calculation with $r_{2s}$. The Sulzmann \& Lu algorithm would return a corresponding value, say $v_{2s}$. But now this value needs to be ``rectified'' to @@ -408,12 +403,12 @@ \quad case $r = r_1 + r_2$\\ \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\ \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\ -\qquad case $r_{1s} = \varnothing$: +\qquad case $r_{1s} = \ZERO$: return $(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$\\ -\qquad case $r_{2s} = \varnothing$: - return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\ +\qquad case $r_{2s} = \ZERO$: + return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\ \qquad case $r_{1s} = r_{2s}$: - return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\ + return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\ \qquad otherwise: return $(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$\\ \end{tabular} @@ -423,8 +418,8 @@ $r_1$ and $r_2$. This gives simplified regular expressions, $r_{1s}$ and $r_{2s}$, as well as two rectification functions $f_{1s}$ and $f_{2s}$. We next need to test whether the -simplified regular expressions are $\varnothing$ so as to make -further simplifications. In case $r_{1s}$ is $\varnothing$, +simplified regular expressions are $\ZERO$ so as to make +further simplifications. In case $r_{1s}$ is $\ZERO$, then we can return $r_{2s}$ (the other alternative). However for this we need to build a corresponding rectification function, which as said above is @@ -433,21 +428,21 @@ $\lambda v.\,Right(f_{2s}(v))$ \end{center} -\noindent The case where $r_{2s} = \varnothing$ is similar: -We return $r_{1s}$ and rectify with $Left(\_)$ and the +\noindent The case where $r_{2s} = \ZERO$ is similar: +We return $r_{1s}$ and rectify with $\Left(\_)$ and the other calculated rectification function $f_{1s}$. This gives \begin{center} -$\lambda v.\,Left(f_{1s}(v))$ +$\lambda v.\,\Left(f_{1s}(v))$ \end{center} \noindent The next case where $r_{1s} = r_{2s}$ can be treated -like the one where $r_{2s} = \varnothing$. We return $r_{1s}$ -and rectify with $Left(\_)$ and so on. +like the one where $r_{2s} = \ZERO$. We return $r_{1s}$ +and rectify with $\Left(\_)$ and so on. The otherwise-case is slightly more complicated. In this case -neither $r_{1s}$ nor $r_{2s}$ are $\varnothing$ and also +neither $r_{1s}$ nor $r_{2s}$ are $\ZERO$ and also $r_{1s} \not= r_{2s}$, which means no further simplification can be applied. Accordingly, we return $r_{1s} + r_{2s}$ as the simplified regular expression. In principle we also do not @@ -458,14 +453,14 @@ rectification functions $f_{1s}$ and $f_{2s}$. We can do this by defining a rectification function $f_{alt}$ which takes two rectification functions as arguments and applies them -according to whether the value is of the form $Left(\_)$ or +according to whether the value is of the form $\Left(\_)$ or $Right(\_)$: \begin{center} \begin{tabular}{l@{\hspace{1mm}}l} $f_{alt}(f_1, f_2) \dn$\\ -\qquad $\lambda v.\,$ case $v = Left(v')$: - & return $Left(f_1(v'))$\\ +\qquad $\lambda v.\,$ case $v = \Left(v')$: + & return $\Left(f_1(v'))$\\ \qquad \phantom{$\lambda v.\,$} case $v = Right(v')$: & return $Right(f_2(v'))$\\ \end{tabular} @@ -481,13 +476,13 @@ \quad case $r = r_1 \cdot r_2$\\ \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\ \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\ -\qquad case $r_{1s} = \varnothing$: - return $(\varnothing, f_{error})$\\ -\qquad case $r_{2s} = \varnothing$: - return $(\varnothing, f_{error})$\\ -\qquad case $r_{1s} = \epsilon$: +\qquad case $r_{1s} = \ZERO$: + return $(\ZERO, f_{error})$\\ +\qquad case $r_{2s} = \ZERO$: + return $(\ZERO, f_{error})$\\ +\qquad case $r_{1s} = \ONE$: return $(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$\\ -\qquad case $r_{2s} = \epsilon$: +\qquad case $r_{2s} = \ONE$: return $(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$\\ \qquad otherwise: return $(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$\\ @@ -506,16 +501,16 @@ \end{tabular} \end{center} -\noindent Note that in the case of $r_{1s} = \varnothing$ +\noindent Note that in the case of $r_{1s} = \ZERO$ (similarly $r_{2s}$) we use the function $f_{error}$ for rectification. If you think carefully, then you will realise that this function will actually never been called. This is -because a sequence with $\varnothing$ will never recognise any +because a sequence with $\ZERO$ will never recognise any string and therefore the second phase of the algorithm would never been called. The simplification function still expects us to give a function. So in my own implementation I just returned a function that raises an error. In the case -where $r_{1s} = \epsilon$ (similarly $r_{2s}$) we have +where $r_{1s} = \ONE$ (similarly $r_{2s}$) we have to create a sequence where the first component is a rectified version of $Empty$. Therefore we call $f_{1s}$ with $Empty$. @@ -541,12 +536,12 @@ \quad case $r = r_1 + r_2$\\ \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\ \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\ -\qquad case $r_{1s} = \varnothing$: +\qquad case $r_{1s} = \ZERO$: return $(r_{2s}, \lambda v. \,Right(f_{2s}(v)))$\\ -\qquad case $r_{2s} = \varnothing$: - return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\ +\qquad case $r_{2s} = \ZERO$: + return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\ \qquad case $r_{1s} = r_{2s}$: - return $(r_{1s}, \lambda v. \,Left(f_{1s}(v)))$\\ + return $(r_{1s}, \lambda v. \,\Left(f_{1s}(v)))$\\ \qquad otherwise: return $(r_{1s} + r_{2s}, f_{alt}(f_{1s}, f_{2s}))$ \medskip\\ @@ -554,13 +549,13 @@ \quad case $r = r_1 \cdot r_2$\\ \qquad let $(r_{1s}, f_{1s}) = simp(r_1)$\\ \qquad \phantom{let} $(r_{2s}, f_{2s}) = simp(r_2)$\smallskip\\ -\qquad case $r_{1s} = \varnothing$: - return $(\varnothing, f_{error})$\\ -\qquad case $r_{2s} = \varnothing$: - return $(\varnothing, f_{error})$\\ -\qquad case $r_{1s} = \epsilon$: +\qquad case $r_{1s} = \ZERO$: + return $(\ZERO, f_{error})$\\ +\qquad case $r_{2s} = \ZERO$: + return $(\ZERO, f_{error})$\\ +\qquad case $r_{1s} = \ONE$: return $(r_{2s}, \lambda v. \,Seq(f_{1s}(Empty), f_{2s}(v)))$\\ -\qquad case $r_{2s} = \epsilon$: +\qquad case $r_{2s} = \ONE$: return $(r_{1s}, \lambda v. \,Seq(f_{1s}(v), f_{2s}(Empty)))$\\ \qquad otherwise: return $(r_{1s} \cdot r_{2s}, f_{seq}(f_{1s}, f_{2s}))$ @@ -655,7 +650,7 @@ \begin{tabular}{lcl} $env(Empty)$ & $\dn$ & $[]$\\ $env(Char(c))$ & $\dn$ & $[]$\\ - $env(Left(v))$ & $\dn$ & $env(v)$\\ + $env(\Left(v))$ & $\dn$ & $env(v)$\\ $env(Right(v))$ & $\dn$ & $env(v)$\\ $env(Seq(v_1,v_2))$& $\dn$ & $env(v_1) \,@\, env(v_2)$\\ $env([v_1,\ldots ,v_n])$ & $\dn$ & diff -r 5c1fbb39c93e -r e4afe3f46c29 progs/app01.scala --- a/progs/app01.scala Wed Apr 06 11:51:33 2016 +0100 +++ b/progs/app01.scala Wed Apr 06 12:18:54 2016 +0100 @@ -1,6 +1,6 @@ abstract class Rexp -case object NULL extends Rexp -case object EMPTY extends Rexp +case object ZERO extends Rexp +case object ONE extends Rexp case class CHAR(c: Char) extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexp case class SEQ(r1: Rexp, r2: Rexp) extends Rexp diff -r 5c1fbb39c93e -r e4afe3f46c29 progs/app03.scala --- a/progs/app03.scala Wed Apr 06 11:51:33 2016 +0100 +++ b/progs/app03.scala Wed Apr 06 12:18:54 2016 +0100 @@ -1,6 +1,6 @@ abstract class Rexp -case object NULL extends Rexp -case object EMPTY extends Rexp +case object ZERO extends Rexp +case object ONE extends Rexp case class CHAR(c: Char) extends Rexp case class ALT(r1: Rexp, r2: Rexp) extends Rexp case class SEQ(r1: Rexp, r2: Rexp) extends Rexp diff -r 5c1fbb39c93e -r e4afe3f46c29 style.sty --- a/style.sty Wed Apr 06 11:51:33 2016 +0100 +++ b/style.sty Wed Apr 06 12:18:54 2016 +0100 @@ -9,9 +9,10 @@ \definecolor{darkblue}{rgb}{0,0,0.6} \usepackage[colorlinks=true,urlcolor=darkblue,linkcolor=darkblue]{hyperref} -%%% fo regular expressions +%%% for regular expressions and values \newcommand{\ZERO}{\mbox{\bf 0}} \newcommand{\ONE}{\mbox{\bf 1}} +\newcommand{\Left}{\textit{Left}} %%% for trees