# HG changeset patch
# User Christian Urban <christian.urban@kcl.ac.uk>
# Date 1757087988 -3600
# Node ID 14e5ae1fb54135f6f4a890014a34984e3c40de34
# Parent  0c491eff5b01d992af41afcc8fcff993caa5c5dd
updated

diff -r 0c491eff5b01 -r 14e5ae1fb541 LINKS
--- a/LINKS	Mon Feb 03 13:25:59 2025 +0000
+++ b/LINKS	Fri Sep 05 16:59:48 2025 +0100
@@ -3,6 +3,11 @@
 http://aalhour.com/awesome-compilers
 https://github.com/aalhour/awesome-compilers
 
+----------------
+https://cs3110.github.io/textbook/cover.html
+
+book and youtube videos about programming in ocaml
+
 
 ----------------
 Video about Language design
@@ -244,4 +249,4 @@
 
 =========================
 How Clang compiles a function
-https://blog.regehr.org/archives/1605
\ No newline at end of file
+https://blog.regehr.org/archives/1605
diff -r 0c491eff5b01 -r 14e5ae1fb541 cw2_marking/cw2_test.sh
--- a/cw2_marking/cw2_test.sh	Mon Feb 03 13:25:59 2025 +0000
+++ b/cw2_marking/cw2_test.sh	Fri Sep 05 16:59:48 2025 +0100
@@ -30,7 +30,7 @@
 # functional tests
 
 function scala_assert {
-  rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ;  
+  rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ;  
   (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out)
 }
 
diff -r 0c491eff5b01 -r 14e5ae1fb541 cw3_marking/cw3_test.sh
--- a/cw3_marking/cw3_test.sh	Mon Feb 03 13:25:59 2025 +0000
+++ b/cw3_marking/cw3_test.sh	Fri Sep 05 16:59:48 2025 +0100
@@ -30,7 +30,7 @@
 # functional tests
 
 function scala_assert {
-  rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ;    
+  rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ;    
   (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out)
 }
 
diff -r 0c491eff5b01 -r 14e5ae1fb541 cw4_marking/cw4_test.sh
--- a/cw4_marking/cw4_test.sh	Mon Feb 03 13:25:59 2025 +0000
+++ b/cw4_marking/cw4_test.sh	Fri Sep 05 16:59:48 2025 +0100
@@ -30,7 +30,7 @@
 # functional tests
 
 function scala_assert {
-  rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ;  
+  rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ;  
   (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out)
 }
 
diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/antimirov.pdf
Binary file handouts/antimirov.pdf has changed
diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/antimirov.tex
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/antimirov.tex	Fri Sep 05 16:59:48 2025 +0100
@@ -0,0 +1,477 @@
+\documentclass[12pt]{article}
+\usepackage{../style}
+\usepackage{../langs}
+\usepackage{graphicx}
+
+\newtheorem{thm}{Theorem}
+\newtheorem{lem}[thm]{Lemma}
+\newtheorem{cor}[thm]{Corollary}
+\newenvironment{proof}{\paragraph{Proof:}\it}{\hfill$\square$}
+
+
+\begin{document}
+
+
+\section*{Antimirov's Proof about Pders}
+
+These are some rough notes about the result by Antimirov establishing
+a bound on the number of regular expressions in a partial
+derivative. From this bound on the number of partial derivatives one
+can easily construct an NFA for a regular expression, but one can also
+derive a bound on the size of the partial derivatives. This is what we
+do first.  We start with the following definitions:
+
+\begin{itemize}
+\item $pder\,c\,r$ --- partial derivative according to a character; this can be defined
+  inductively as follows:
+
+  \begin{center}
+    \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
+  $\textit{pder}\, c\, (\ZERO)$      & $\dn$ & $\emptyset$\\
+  $\textit{pder}\, c\, (\ONE)$         & $\dn$ & $\emptyset$ \\
+  $\textit{pder}\, c\, (d)$                & $\dn$ & if $c = d$ then $\{\ONE\}$ else $\emptyset$\\
+  $\textit{pder}\, c\, (r_1 + r_2)$        & $\dn$ & $\textit{pder}\, c\, r_1 \;\cup\; \textit{pder}\, c\, r_2$\\
+  $\textit{pder}\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $\textit{nullable} (r_1)$\\
+  & & then $\Pi\,(\textit{pder}\,c\,r_1)\,r_2 \;\cup\; \textit{pder}\, c\, r_2$\\ 
+  & & else $\Pi\,(\textit{pder}\, c\, r_1)\,r_2$\\
+  $\textit{pder}\, c\, (r^*)$          & $\dn$ & $\Pi\,(\textit{pder}\,c\,r)\, (r^*)$
+  \end{tabular}
+\end{center}
+  
+\item $pder^+\,c\,\,rs$ --- partial derivatives for a set regular exprssions $rs$
+\item $pders\,s\,r$ --- partial derivative of a regular expression
+  according to a string
+
+\item $Pders\,A\,r \dn \bigcup_{s\in A}. pders\,s\,r$ --- partial
+  derivatives according to a language (a set of strings)
+\item $|rs|$ is the size of a set of regular expressions $rs$, or
+  the number of elements in the
+  set (also known as the cardinality of this set)
+\item $\prod\,rs\;r \dn \{r_1 \cdot r \;|\; r_1 \in rs\}$ --- this is
+  some convenience when writing a set of sequence regular
+  expressions. It essentially ``appends'' the regular expression $r$ to all
+  regular expressions in the set $rs$. As a result one can write
+  the sequence case for partial derivatives (see above) more conveniently 
+  as
+  \[
+    pder\,c\,(r_1\cdot r_2) \dn
+    \begin{cases}
+      \prod\,(pder\,c\,r_1)\,r_2\;\cup\;pder\,c\,r_2 &
+        \!\!\textit{provided}\,r_1\, \textit{is nullable}\\
+      \prod\,(pder\,c\,r_1)\,r_2 & \!\!\textit{otherwise}\\
+    \end{cases}  
+  \]
+\item $\textit{Psuf}\,s$ is the set of all non-empty suffixes of $s$ defined as
+  \[
+  \textit{PSuf}\, s \dn \{v.\;v \not= [] \wedge \exists u. u \,@\, v = s \}  
+\]
+
+So for the string $abc$ the non-empty suffixes are $c$, $bc$ and
+$abc$.  Also we have that
+$\textit{Psuf}\,(s\,@\,[c]) = ((\textit{Psuf}\,s)\,@@\,[c]) \cup
+\{[c]\}$. Here $@@$ means to concatenate $[c]$ to the end of
+all strings in $\textit{Psuf}\,s$; in this equation  we also
+need to add $\{[c]\}$ in order to make the equation to hold.
+\end{itemize}  
+
+\noindent
+To state Antimirov's result we need the following definition of an
+\emph{alphabetic width} of a regular expression defined as follows:
+
+\begin{center}
+\begin{tabular}{lcl}  
+  $awidth(\ZERO)$ & $\dn$ & $0$\\
+  $awidth(\ONE)$ & $\dn$ & $0$\\
+  $awidth(c)$ & $\dn$ & $1$\\
+  $awidth(r_1 + r_2)$     & $\dn$ & $awidth(r_1) + awidth(r_2)$\\
+  $awidth(r_1 \cdot r_2)$ & $\dn$ & $awidth(r_1) + awidth(r_2)$\\
+  $awidth(r^*)$ & $\dn$ & $awidth(r)$\\
+\end{tabular}
+\end{center}
+
+\noindent
+This function counts how many characters are in a regular expression.
+Antimirov's result states
+
+\begin{thm}\label{one}
+$\forall\,A\,r\,.\;\;|Pders\;A\;r| \leq awidth(r) + 1$
+\end{thm}
+
+\noindent
+Note this theorem holds for any set of strings $A$, for example
+for the set of all strings, which I will write as $\textit{UNIV}$, and also
+for the set $\{s\}$ containing only a single string $s$. Therefore a
+simple corollary is 
+
+\begin{cor}
+$\forall\,s\,r\,.\;\;|pders\;s\;r| \leq awidth(r) + 1$
+\end{cor}
+
+\noindent
+This property says that for every string $s$, the number of regular
+expressions in the derivative can never be bigger than
+$awidth(r) + 1$.  Interestingly we do not show Thm~\ref{one} for all
+sets of strings $A$ directly, but rather only for one particular set of
+strings which I call $UNIV_1$. It includes all strings except the empty string
+(remember $UNIV$ contains all strings).\bigskip
+
+\noindent
+Let's try to give below a comprehensible account of Antimirov's proof
+of Thm.~\ref{one}---I distictly remember that Antimirov's paper is
+great, but pretty incomprehensible for the first 20+ times one reads
+that paper.  The proof starts with the following much weaker property
+about the size being finite:
+
+\begin{lem}
+$\forall\,A\,r\,.\;\;(Pders\;A\;r)$ is a finite set.
+\end{lem}
+
+\noindent This lemma is needed because some reasoning steps in
+Thm~\ref{one} only work for finite sets, not infinite sets. But let us
+skip over the proof of this property at first and let us assume we
+know already that the partial derivatives are always finite sets (this for
+example does not hold for unsimplified Brzozowski derivatives which
+can be infinite for some sets of strings).
+
+There are some central lemmas about partial derivatives for $\cdot$ and $\_^*$.
+One is the following
+
+\begin{lem}\label{central}\mbox{}\\
+  \[Pders\,UNIV_1\,(r_1\cdot r_2) \subseteq (\prod (Pders\,UNIV_1\, r_1)\,r_2) \;
+  \cup \; Pders\,UNIV_1\,r_2\]
+\end{lem}  
+
+\begin{proof}
+  \noindent The proof is done via an induction for the following property
+  \[
+  pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \;
+  \cup \; Pders\,(\textit{PSuf}\,s)\,r_2
+  \]
+
+  \noindent
+  Note that this property uses $pders$ and $Pders$ together. The proof is done
+  by ``reverse'' induction on $s$, meaning the cases to analyse are the
+  empty string $[]$ and the case where a character is put at the end of the
+  string $s$, namely $s \,@\, [c]$. The case $[]$ is trivial. In the other
+  case we know by IH that
+
+   \[
+  pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \;
+  \cup \; Pders\,(\textit{PSuf}\,s)\,r_2
+  \]
+
+  \noindent
+  holds for $s$. Then we have to show it holds for $s\,@\,[c]$
+
+  \begin{center}
+  \begin{tabular}{r@{\hspace{2mm}}ll}
+        & $pders\,(s\,@\,[c])\,(r_1\cdot r_2)$\\
+    $=$ & $pder^+\,c\,(pders\,s\,(r_1\cdot r_2))$\\
+    $\subseteq$ & $pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2 \;
+                  \cup \; Pders\,(\textit{PSuf}\,s)\,r_2)$\\
+    & \hfill{}by IH\\
+    $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+          (pder^+\,c\,(Pders\,(\textit{PSuf}\,s)\,r_2))$\\
+    $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+          (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\
+    $\subseteq$ &
+                  $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+                  (pder\,c\,r_2)\;\cup\;
+                  (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+    $\subseteq$ & $\prod (pder^+\,c\,(pders\,s\, r_1))\,r_2\;\cup\;
+                  (pder\,c\,r_2)\;\cup\;
+                  (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+    $=$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\;
+                  (pder\,c\,r_2)\;\cup\;
+    (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+    $\subseteq$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\;
+    (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\
+  \end{tabular}    
+  \end{center}
+
+  \noindent The lemma now follows because for an $s \in UNIV_1$ it holds that
+
+  \[
+    \prod\,(pders\,s\,r_1)\,r_2 \subseteq \prod (Pders\,UNIV_1\, r_1)\,r_2
+  \]
+
+  \noindent and
+
+  \[
+    Pders\,(\textit{PSuf}\,s)\,r_2 \subseteq Pders\,UNIV_1\,r_2
+  \]
+
+  \noindent The left-hand sides of the inclusions above are
+  euqal to $pders\,s\,(r_1\cdot r_2)$ for a string $s\in UNIV_1$.
+\end{proof}\medskip
+
+\noindent There is a similar lemma for the $^*$-regular expression, namely:
+
+\begin{lem}\label{centraltwo}
+$Pders\,UNIV_1\,(r^*) \subseteq \prod\, (Pders\,UNIV_1\,r)\,(r^*)$
+\end{lem}  
+
+\noindent 
+We omit the proof for the moment (similar to Lem~\ref{central}). We
+also need the following property about the cardinality of $\prod$:
+
+\begin{lem}\label{centralthree}
+  $|\prod\,(Pders\,A\,r_1)\,r_2| \le |Pders\,A\,r_1|$
+\end{lem}  
+
+\noindent
+We only need the $\le$ version, which essentially says there
+are as many sequences $r\cdot r_2$ as are elements in $r$. Now
+for the proof of Thm~\ref{one}: The main induction in
+Antimirov's proof establishes that:\footnote{Remember that it is
+  always the hardest part in an induction proof to find the right
+  property that is strong enough and of the right shape for the
+  induction to go through.}
+
+\begin{lem}\label{two}
+$\forall r.\;\;|Pders\;UNIV_1\;r| \leq awidth(r)$
+\end{lem}
+
+\begin{proof} This is proved by induction on
+  $r$. The interesting cases are $r_1 + r_2$, $r_1\cdot r_2$ and
+  $r^*$. Let us start with the relatively simple case:\medskip
+
+\noindent  
+\textbf{Case $r_1 + r_2$:} By induction hypothesis we know
+
+\begin{center}
+\begin{tabular}{l}
+  $|Pders\;UNIV_1\;r_1| \leq awidth(r_1)$\\
+  $|Pders\;UNIV_1\;r_2| \leq awidth(r_2)$
+\end{tabular}    
+\end{center}  
+
+\noindent
+In this case we can reason as follows
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+  & $|Pders\;UNIV_1\;(r_1+r_2)|$\\
+  $=$ & $|(Pders\;UNIV_1\;r_1) \;\cup\; (Pders\;UNIV_1\;r_2)|$\\
+  $\leq$ & $|(Pders\;UNIV_1\;r_1)| \;+\; |(Pders\;UNIV_1\;r_2)|$ & (*)\\
+  $\leq$ & $awidth(r_1) + awidth(r_2)$\\
+  $\dn$ & $awidth(r_1 + r_2)$
+\end{tabular}    
+\end{center}  
+
+\noindent Note that (*) is a step that only works if one knows that
+$|(Pders\;UNIV_1\;r_1)|$ and so on are finite. The next case is
+a bit more interesting:\medskip
+
+\noindent  
+\textbf{Case $r_1 \cdot r_2$:} We have the same induction
+hypothesis as in the case before. 
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+  & $|Pders\;UNIV_1\;(r_1\cdot r_2)|$\\
+  $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2\;\cup\; (Pders\;UNIV_1\;r_2)|$
+  & by Lem~\ref{central}\\
+  $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2| \;+\; |(Pders\;UNIV_1\;r_2)|$\\
+  $\leq$ & $|Pders\;UNIV_1\;r_1| \;+\; |Pders\;UNIV_1\;r_2|$
+  & by Lem~\ref{centralthree} \\
+  $\leq$ & $awidth(r_1) + awidth(r_2)$\\
+  $\dn$ & $awidth(r_1 \cdot r_2)$
+\end{tabular}    
+\end{center} \medskip
+
+\noindent  
+\textbf{Case $r^*$:} Again we have the same induction
+hypothesis as in the cases before.
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+  & $|Pders\;UNIV_1\;(r^*)|$\\
+  $\leq$ & $|\prod\,(Pders\;UNIV_1\;r)\,(r^*)|$
+  & by Lem~\ref{centraltwo}\\
+  $\leq$ & $|Pders\;UNIV_1\;r|$
+  & by Lem~\ref{centralthree} \\
+  $\leq$ & $awidth(r)$\\
+\end{tabular}    
+\end{center}  
+\end{proof}\bigskip  
+
+\noindent
+From this lemma we can derive the next corrollary which extends
+the property to $UNIV$ ($= UNIV_1 \cup \{[]\}$):
+
+\begin{cor}
+$\forall r.\;\;|Pders\;UNIV\;r| \leq awidth(r) + 1$
+\end{cor}
+
+\begin{proof} This can be proved as follows
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+  & $|Pders\;UNIV\;r|$\\
+  $=$ & $|Pders\;(UNIV_1 \cup \{[]\})\;r|$\\
+  $=$ & $|(Pders\;UNIV_1\,r) \;\cup\;\{r\}|$\\
+  $\leq$ & $|Pders\;UNIV_1\,r| + 1$ & by Lem~\ref{two}\\
+  $\leq$ & $awidth(r) + 1$\\
+\end{tabular}    
+\end{center}  
+\end{proof}
+
+\noindent
+From the last corollary, it is easy to infer Antimirov's
+Thm~\ref{one}, because
+
+\[ Pders\,A\,r \subseteq Pders\,UNIV\,r \]
+
+
+\noindent
+for all sets $A$.\bigskip
+
+\noindent
+While I was earlier a bit dismissive above about the intelligibility
+of Antimirov's paper, you have to admit this proof is a work of
+beauty. It only gives a bound (\textit{awidth}) for the number of
+regular expressions in the de\-rivatives---this is important for
+constructing NFAs.  Maybe it has not been important before, but I have
+never seen a result about the \emph{size} of the partial
+derivatives.\footnote{Update: I have now seen a paper which proves
+  this result as well.}  However, a very crude bound, namely
+$(size(r)^2 + 1) \times (awidth(r) + 1)$, can be easily derived by
+using Antimirov's result. The definition we need for this is a
+function that collects subexpressions from which partial derivatives
+are built:
+
+
+\begin{center}
+\begin{tabular}{lcl}  
+  $subs(\ZERO)$ & $\dn$ & $\{\ZERO\}$\\
+  $subs(\ONE)$ & $\dn$ & $\{\ONE\}$\\
+  $subs(c)$ & $\dn$ & $\{c, \ONE\}$\\
+  $subs(r_1 + r_2)$     & $\dn$ & $\{r_1 + r_2\}\,\cup\,subs(r_1) \,\cup\, subs(r_2)$\\
+  $subs(r_1 \cdot r_2)$ & $\dn$ & $\{r_1 \cdot r_2\}\,\cup (\prod\,subs(r_1)\;r_2)\,\cup \,
+                                  subs(r_1) \,\cup\, subs(r_2)$\\
+  $subs(r^*)$ & $\dn$ & $\{r^*\}\,\cup\,(\prod\,subs(r)\;r^*) \,\cup\, subs(r)$\\
+\end{tabular}
+\end{center}
+
+\noindent
+We can show that
+
+\begin{lem}
+$pders\,s\,r \subseteq subs(r)$
+\end{lem}  
+
+\noindent
+This is a relatively simple induction on $r$. The point is that for every element
+in $subs$, the maximum size is given by
+
+\begin{lem}
+  If $r' \in subs(r)$ then $size(r') \le 1 + size(r)^2$.
+\end{lem}  
+
+\noindent
+Again the proof is a relatively simple induction on $r$. Stringing Antimirov's result
+and the lemma above together gives
+
+\begin{thm}
+$\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r)^2 + 1) \times (awidth(r) + 1)$
+\end{thm}  
+
+\noindent
+Since $awidth$ is always smaller than the $size$ of a regular expression,
+one can also state the bound as follows:
+
+\[
+\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r) + 1)^3
+\]  
+
+\noindent
+This, by the way, also holds for $Pders$, namely
+
+\[
+\sum_{r' \in Pders\,A\,r}.\;size(r') \le (size(r) + 1)^3
+\]  
+
+\noindent
+for all $r$ and $A$. If one is interested in the height of the partial derivatives, one can derive:
+
+\[
+\forall\,r' \in pders\,s\,r.\;height(r') \le height(r) + 1
+\]  
+
+
+\noindent
+meaning the height of the partial derivatives is never bigger than
+the height of the original regular expression (+1).
+
+\section*{NFA Construction via Antimirov's Partial Derivatives}
+
+Let's finish these notes with the construction of an NFA for a regular
+expression using partial derivatives.  As usual an automaton is a
+quintuple $(Q, A, \delta, q_0, F)$ where $Q$ is the set of states of
+the automaton, $A$ is the alphabet, $q_0$ is the starting state and
+$F$ are the accepting states.  For DFAs the $\delta$ is a (partial)
+function from state $\times$ character to state. For NFAs it is a
+relation between state $\times$ character $\times$ state. The
+non-determinism can be seen by the following: consider three
+(distinct) states $q_1$, $q_2$ and $q_3$, then the relation can
+include $(q_1, a, q_2)$ and $(q_1, a, q_3)$, which means there is a
+transition between $q_1$ and both $q_2$ and $q_3$ for the character
+$a$.
+
+The Antimirov's NFA for a regular expression $r$ is then
+given by the quintuple
+\[(PD(r), A, \delta_{PD}, r, F)\]
+
+\noindent
+where $PD(r)$ are all the partial
+derivatives according to all strings, that is
+\[
+PD(r) \;\dn\; \textit{Pders}\;\textit{UNIV}\;r
+\]
+
+\noindent
+Because of the previous proof, we know that this set is finite. We
+also see that the states in Antimirov's NFA are ``labelled'' by single
+regular expressions.  The starting state is labelled with the original
+regular expression $r$. The set of accepting states $F$ is all states
+$r'\in F$ where $r'$ is nullable. The relation $\delta_{PD}$ is given by
+\[
+(r_1, c, r_2)
+\]
+
+\noindent
+for every $r_1 \in PD(r)$ and $r_2 \in \textit{pder}\,c\,r$. This is
+in general a ``non-deterministic'' relation because the set of partial
+derivatives often contains more than one element. A nice example of
+an NFA constructed via Antimirov's partial derivatives is given in
+\cite{IlieYu2003} on Page 378.
+
+The difficulty of course in this construction is to find the set of
+partial derivatives according to \emph{all} strings. However, it seem
+a procedure that enumerates strings according to size suffices until
+no new derivative is found. There are various improvements that apply
+clever tricks on how to more efficiently discover this set.
+
+
+\begin{thebibliography}{999}
+
+\bibitem{IlieYu2003}
+  L.~Ilie and S.~Yu,
+  \emph{Reducing NFAs by Invariant Equivalences}.
+  In Theoretical Computer Science, Volume 306(1--3), Pages 373–-390, 2003.\\
+  \url{https://core.ac.uk/download/pdf/82545723.pdf}
+
+\end{thebibliography}
+
+
+
+
+
+
+\end{document}
+
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/build.sc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/build.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -0,0 +1,68 @@
+#!/usr/bin/env amm
+
+
+/*
+val files = Seq("hw01.tex",
+	        "hw02.tex",
+	        "hw03.tex",
+	        "hw04.tex",
+	        "hw05.tex",
+                "hw06.tex",
+	        "hw07.tex",
+	        "hw08.tex",
+	        "hw09.tex")
+
+val pdf_files = files.map(s => s.stripSuffix("tex") ++ "pdf")
+
+
+@main
+def make() = {
+  for (f <- files) {
+    println(s"Processing $f ...")
+    os.proc("lualatex", f).call(stdout = os.Inherit, stdin = os.Inherit)
+  }
+}
+
+@main
+def make_sols() = {
+  for (f <- files) {
+    val old_pdf = f.stripSuffix(".tex") ++ ".pdf"
+    val new_pdf = f.stripSuffix(".tex") ++ "-sol.pdf"
+    println(s"Processing $f -> $new_pdf ...")
+    os.proc("lualatex", f, "sol").call(stdout = os.Inherit, stdin = os.Inherit)
+    os.move.over(os.pwd / old_pdf, os.pwd / new_pdf)
+  }
+}
+
+@main
+def all() = {
+  make_sols()
+  make()
+}
+
+@main
+def hg() = {
+  println(os.proc("hg", "commit", "-m texupdate", files ++ pdf_files).call())
+  println(os.proc("hg", "push").call())
+}
+
+*/
+ 
+// extensions of files to be deleted
+val todelete =
+  List("fdb_latexmk",
+       "log",
+       "aux",
+       "xdv",
+       "out", 
+       "fls")
+
+
+@main
+def clean() = {
+  for (f <- os.list.stream(os.pwd) if todelete.contains(f.ext)) {
+    println(s"Delete: ${f.last}")
+    os.remove(f)
+  }
+}
+
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/Matcher.thy
--- a/progs/Matcher.thy	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/Matcher.thy	Fri Sep 05 16:59:48 2025 +0100
@@ -107,6 +107,8 @@
 apply(simp only: L.simps)
 apply(simp)
 (* SEQ case *)
+apply(simp only: L.simps)
+apply(simp)
 oops
 
 lemma nullable_correctness:
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/crawler.scala
--- a/progs/crawler.scala	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/crawler.scala	Fri Sep 05 16:59:48 2025 +0100
@@ -1,3 +1,15 @@
+// Try out requests library
+//
+// https://github.com/com-lihaoyi/requests-scala
+//
+//
+import $ivy.`com.lihaoyi::requests:0.9.0`
+
+
+
+requests.get("https://nms.kcl.ac.uk/christian.urban/")
+
+
 import io.Source
 import scala.util.matching.Regex
 
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/lexer/lex.sc
--- a/progs/lexer/lex.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/lexer/lex.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -7,28 +7,33 @@
 //
 
 
-// regular expressions including records
-abstract class Rexp 
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
-case class RECD(x: String, r: Rexp) extends Rexp  
-          // records for extracting strings or tokens
-  
+// regular expressions including recods 
+// for extracting strings or tokens
+enum Rexp { 
+  case ZERO 
+  case ONE 
+  case CHAR(c: Char)
+  case ALT(r1: Rexp, r2: Rexp)
+  case SEQ(r1: Rexp, r2: Rexp)
+  case STAR(r: Rexp) 
+  case RECD[A](label: A, r: Rexp)
+}
+import Rexp._  
+
 // values  
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-   
+enum Val {
+  case Empty 
+  case Chr(c: Char) 
+  case Sequ(v1: Val, v2: Val) 
+  case Left(v: Val) 
+  case Right(v: Val) 
+  case Stars(vs: List[Val]) 
+  case Rec[A](label: A, v: Val)
+}   
+import Val._
+
 // some convenience for typing in regular expressions
+import scala.language.implicitConversions
 
 def charlist2rexp(s : List[Char]): Rexp = s match {
   case Nil => ONE
@@ -48,9 +53,6 @@
 
 // to use & for records, instead of $ which had
 // its precedence be changed in Scala 3
-extension (s: String) {
-  def & (r: Rexp) = RECD(s, r)
-}
 
 val TEST = ("ab" | "ba").%
 
@@ -91,21 +93,27 @@
 
 // extracts an environment from a value;
 // used for tokenising a string
-def env(v: Val) : List[(String, String)] = v match {
+//import scala.reflect.ClassTag
+
+def env[A](v: Val) : List[(A, String)] = v match {
   case Empty => Nil
   case Chr(c) => Nil
   case Left(v) => env(v)
   case Right(v) => env(v)
   case Sequ(v1, v2) => env(v1) ::: env(v2)
   case Stars(vs) => vs.flatMap(env)
-  case Rec(x, v) => (x, flatten(v))::env(v)
+  case Rec[A](x, v) => (x, flatten(v))::env(v)  
 }
 
 
 // The injection and mkeps part of the lexer
 //===========================================
 
-def mkeps(r: Rexp) : Val = r match {
+// the pattern-matches are defined to be @unchecked
+// because they do not need to be defined for
+// all cases
+
+def mkeps(r: Rexp) : Val = (r: @unchecked) match {
   case ONE => Empty
   case ALT(r1, r2) => 
     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
@@ -114,7 +122,7 @@
   case RECD(x, r) => Rec(x, mkeps(r))
 }
 
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match {
   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
@@ -132,13 +140,27 @@
   case c::cs => inj(r, c, lex(der(c, r), cs))
 }
 
-def lexing(r: Rexp, s: String) = 
-  env(lex(r, s.toList))
+
 
 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList))
 
 println(lex(STAR("aa" | "a"), "aaa".toList))
 
+println(lex(STAR(STAR("a")), "aaa".toList))
+
+val re = ("a" | "ab") ~ ("c" | "bc")
+
+println(pders1("abc", re).toList.mkString("\n"))
+pders('a', pder('a', re))))
+draw(simp(der('a', der('a', der('a', re)))))
+
+size(simp(ders(, re)))
+size(simp(der('a', der('a', re))))
+size(simp(der('a', der('a', der('a', re)))))
+
+
+lex(re, "aaaaa".toList)
+
 // The Lexing Rules for the WHILE Language
 
 def PLUS(r: Rexp) = r ~ r.%
@@ -163,25 +185,31 @@
 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
 
 
-val WHILE_REGS = (("k" & KEYWORD) | 
-                  ("i" & ID) | 
-                  ("o" & OP) | 
-                  ("n" & NUM) | 
-                  ("s" & SEMI) | 
-                  ("str" & STRING) |
-                  ("p" & (LPAREN | RPAREN)) | 
-                  ("w" & WHITESPACE)).%
+enum TAGS {
+  case Key, Id, Op, Num, Semi, Str, Paren, Wht
+}
+import TAGS._
+
+extension (t: TAGS) {
+  def & (r: Rexp) = RECD[TAGS](t, r)
+}
 
-val KY : Rexp = "if" | "read" | "write"
-val WH : Rexp = " " | "\n"
+def lexing(r: Rexp, s: String) = 
+  env[TAGS](lex(r, s.toList))
 
-val TRIV_REGS = (("k" & KY) | 
-                  ("w" & WHITESPACE)).%
+val WHILE_REGS = ((Key & KEYWORD) | 
+                  (Id & ID) | 
+                  (Op & OP) | 
+                  (Num & NUM) | 
+                  (Semi & SEMI) | 
+                  (Str & STRING) |
+                  (Paren & (LPAREN | RPAREN)) | 
+                  (Wht & WHITESPACE)).%
+
 
 // Two Simple While Tests
 //========================
-
-//@arg(doc = "small tests")
+ 
 @main
 def small() = {
 
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/matcher/re1.sc
--- a/progs/matcher/re1.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/matcher/re1.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -10,15 +10,16 @@
 //
 
  
-// regular expressions
-abstract class Rexp
-case object ZERO extends Rexp                    // matches nothing
-case object ONE extends Rexp                     // matches an empty string
-case class CHAR(c: Char) extends Rexp            // matches a character c
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp  // alternative
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  // sequence
-case class STAR(r: Rexp) extends Rexp            // star
-
+// regular expressions (as enum in Scala 3)
+enum Rexp {
+  case ZERO                     // matches nothing
+  case ONE                      // matches an empty string
+  case CHAR(c: Char)            // matches a character c
+  case ALT(r1: Rexp, r2: Rexp)  // alternative
+  case SEQ(r1: Rexp, r2: Rexp)  // sequence
+  case STAR(r: Rexp)            // star
+}
+import Rexp._
 
 // nullable function: tests whether a regular 
 // expression can recognise the empty string  
@@ -172,9 +173,74 @@
   }
 }
 
+
+
+// Some code for pretty printing regexes as trees
+
+def implode(ss: Seq[String]) = ss.mkString("\n")
+def explode(s: String) = s.split("\n").toList
+
+def lst(s: String) : String = explode(s) match {
+  case hd :: tl => implode(" └" ++ hd :: tl.map("  " ++ _))
+  case Nil => ""
+}
+
+def mid(s: String) : String = explode(s) match {
+  case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
+  case Nil => ""
+}
+
+def indent(ss: Seq[String]) : String = ss match {
+  case init :+ last => implode(init.map(mid) :+ lst(last))
+  case _ => ""
+}
+
+def pp(e: Rexp) : String = e match {
+  case ZERO => "0\n"
+  case ONE => "1\n"
+  case CHAR(c) => s"$c\n"
+  case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
+  case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
+  case STAR(r) => "STAR\n" ++ pps(r)
+}
+def pps(es: Rexp*) = indent(es.map(pp))
+
+
 @main
-def all() = { test1(); test2() ; test3() } 
+def test4() = {
+  println(pp(r2))
+  println(pp(ders("x".toList, r2)))
+  println(pp(ders("xy".toList, r2)))
+  println(pp(ders("xyz".toList, r2)))
+}
+
+@main
+def all() = { test1(); test2() ; test3() ; test4() } 
 
 
 
 
+
+// partial derivatives produce a set of regular expressions
+def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
+  case ZERO => Set()
+  case ONE => Set()
+  case CHAR(d) => if (c == d) Set(ONE) else Set()
+  case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
+  case SEQ(r1, r2) => {
+    (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++
+    (if (nullable(r1)) pder(c, r2) else Set())
+  }
+  case STAR(r1) => {
+    for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
+  }
+}
+
+def pders(s: List[Char], rs: Set[Rexp]) : Set[Rexp] = s match {
+  case Nil => rs
+  case c::s => pders(s, rs.flatMap(pder(c, _)))
+}
+
+def pders1(s: String, r: Rexp) = pders(s.toList, Set(r))
+
+
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/matcher/re3.sc
--- a/progs/matcher/re3.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/matcher/re3.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -200,3 +200,42 @@
 def fail() = { test3(); test4(); test5() } 
 
 
+// simplification
+def simp2(r: Rexp) : Rexp = r match {
+  case ALT(r1, r2) => (simp2(r1), simp2(r2)) match {
+    case (ZERO, r2s) => r2s
+    case (r1s, ZERO) => r1s
+    //case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+    case (r1s, r2s) => ALT(r1s, r2s)
+  }
+  case SEQ(r1, r2) =>  (simp2(r1), simp2(r2)) match {
+    case (ZERO, _) => ZERO
+    case (_, ZERO) => ZERO
+    //case (ONE, r2s) => r2s
+    //case (r1s, ONE) => r1s
+    case (r1s, r2s) => SEQ(r1s, r2s)
+  }
+  case r => r
+}
+
+// some convenience for typing in regular expressions
+import scala.language.implicitConversions
+
+def charlist2rexp(s : List[Char]): Rexp = s match {
+  case Nil => ONE
+  case c::Nil => CHAR(c)
+  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+
+given Conversion[String, Rexp] = (s => charlist2rexp(s.toList))
+
+val HELLO : Rexp = "hello"
+
+extension (r: Rexp) {
+  def | (s: Rexp) = ALT(r, s)
+  def % = STAR(r)
+  def ~ (s: Rexp) = SEQ(r, s)
+}
+
+val re = (ONE | "a" | "ab") ~ ("c" | "bc") ~ ("d" | "e")
+simp2(der('d', der('c', der('b', der('a', re)))))
\ No newline at end of file
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/parser-combinators/comb2.sc
--- a/progs/parser-combinators/comb2.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/parser-combinators/comb2.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -59,6 +59,8 @@
   }
 }
 
+
+
 // atomic parser for identifiers (variable names)
 case object IdParser extends Parser[String, String] {
   val reg = "[a-z][a-z,0-9]*".r
@@ -83,11 +85,20 @@
 // the following string interpolation allows us to write 
 // StrParser(_some_string_) more conveniently as 
 //
-// p"<_some_string_>" 
+// p"_some_string_" 
 
 extension (sc: StringContext) 
   def p(args: Any*) = StrParser(sc.s(args*))
 
+
+
+// the following is an alternative way to achieve the same, except
+// that the p needs parentheses as in
+//
+// p("_some_string_")
+//
+//import StrParser.{apply => p}
+
 // the abstract syntax trees for the WHILE language
 abstract class Stmt
 abstract class AExp
diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/while-arrays/compile_arrays.sc
--- a/progs/while-arrays/compile_arrays.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/while-arrays/compile_arrays.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -7,7 +7,7 @@
 //
 // amm compile_arrays.sc
   
-//> using toolkit 0.6.0
+//> using toolkit 0.7.0
 
 
 // the abstract syntax trees for WHILE
diff -r 0c491eff5b01 -r 14e5ae1fb541 slides/slides01.pdf
Binary file slides/slides01.pdf has changed