# HG changeset patch # User Christian Urban # Date 1757087988 -3600 # Node ID 14e5ae1fb54135f6f4a890014a34984e3c40de34 # Parent 0c491eff5b01d992af41afcc8fcff993caa5c5dd updated diff -r 0c491eff5b01 -r 14e5ae1fb541 LINKS --- a/LINKS Mon Feb 03 13:25:59 2025 +0000 +++ b/LINKS Fri Sep 05 16:59:48 2025 +0100 @@ -3,6 +3,11 @@ http://aalhour.com/awesome-compilers https://github.com/aalhour/awesome-compilers +---------------- +https://cs3110.github.io/textbook/cover.html + +book and youtube videos about programming in ocaml + ---------------- Video about Language design @@ -244,4 +249,4 @@ ========================= How Clang compiles a function -https://blog.regehr.org/archives/1605 \ No newline at end of file +https://blog.regehr.org/archives/1605 diff -r 0c491eff5b01 -r 14e5ae1fb541 cw2_marking/cw2_test.sh --- a/cw2_marking/cw2_test.sh Mon Feb 03 13:25:59 2025 +0000 +++ b/cw2_marking/cw2_test.sh Fri Sep 05 16:59:48 2025 +0100 @@ -30,7 +30,7 @@ # functional tests function scala_assert { - rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ; + rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ; (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out) } diff -r 0c491eff5b01 -r 14e5ae1fb541 cw3_marking/cw3_test.sh --- a/cw3_marking/cw3_test.sh Mon Feb 03 13:25:59 2025 +0000 +++ b/cw3_marking/cw3_test.sh Fri Sep 05 16:59:48 2025 +0100 @@ -30,7 +30,7 @@ # functional tests function scala_assert { - rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ; + rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ; (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out) } diff -r 0c491eff5b01 -r 14e5ae1fb541 cw4_marking/cw4_test.sh --- a/cw4_marking/cw4_test.sh Mon Feb 03 13:25:59 2025 +0000 +++ b/cw4_marking/cw4_test.sh Fri Sep 05 16:59:48 2025 +0100 @@ -30,7 +30,7 @@ # functional tests function scala_assert { - rm -rf /Users/cu/.ammonite/cache/3.0.0-M2/* 2> /dev/null ; + rm -rf /Users/cu/.ammonite/cache/3.*/* 2> /dev/null ; (ulimit -t 30; JAVA_OPTS="-Xmx1g -XX:MaxJavaStackTraceDepth=10" amm -s "$1" 1>> $out 2>> $out) } diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/antimirov.pdf Binary file handouts/antimirov.pdf has changed diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/antimirov.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/handouts/antimirov.tex Fri Sep 05 16:59:48 2025 +0100 @@ -0,0 +1,477 @@ +\documentclass[12pt]{article} +\usepackage{../style} +\usepackage{../langs} +\usepackage{graphicx} + +\newtheorem{thm}{Theorem} +\newtheorem{lem}[thm]{Lemma} +\newtheorem{cor}[thm]{Corollary} +\newenvironment{proof}{\paragraph{Proof:}\it}{\hfill$\square$} + + +\begin{document} + + +\section*{Antimirov's Proof about Pders} + +These are some rough notes about the result by Antimirov establishing +a bound on the number of regular expressions in a partial +derivative. From this bound on the number of partial derivatives one +can easily construct an NFA for a regular expression, but one can also +derive a bound on the size of the partial derivatives. This is what we +do first. We start with the following definitions: + +\begin{itemize} +\item $pder\,c\,r$ --- partial derivative according to a character; this can be defined + inductively as follows: + + \begin{center} + \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l} + $\textit{pder}\, c\, (\ZERO)$ & $\dn$ & $\emptyset$\\ + $\textit{pder}\, c\, (\ONE)$ & $\dn$ & $\emptyset$ \\ + $\textit{pder}\, c\, (d)$ & $\dn$ & if $c = d$ then $\{\ONE\}$ else $\emptyset$\\ + $\textit{pder}\, c\, (r_1 + r_2)$ & $\dn$ & $\textit{pder}\, c\, r_1 \;\cup\; \textit{pder}\, c\, r_2$\\ + $\textit{pder}\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $\textit{nullable} (r_1)$\\ + & & then $\Pi\,(\textit{pder}\,c\,r_1)\,r_2 \;\cup\; \textit{pder}\, c\, r_2$\\ + & & else $\Pi\,(\textit{pder}\, c\, r_1)\,r_2$\\ + $\textit{pder}\, c\, (r^*)$ & $\dn$ & $\Pi\,(\textit{pder}\,c\,r)\, (r^*)$ + \end{tabular} +\end{center} + +\item $pder^+\,c\,\,rs$ --- partial derivatives for a set regular exprssions $rs$ +\item $pders\,s\,r$ --- partial derivative of a regular expression + according to a string + +\item $Pders\,A\,r \dn \bigcup_{s\in A}. pders\,s\,r$ --- partial + derivatives according to a language (a set of strings) +\item $|rs|$ is the size of a set of regular expressions $rs$, or + the number of elements in the + set (also known as the cardinality of this set) +\item $\prod\,rs\;r \dn \{r_1 \cdot r \;|\; r_1 \in rs\}$ --- this is + some convenience when writing a set of sequence regular + expressions. It essentially ``appends'' the regular expression $r$ to all + regular expressions in the set $rs$. As a result one can write + the sequence case for partial derivatives (see above) more conveniently + as + \[ + pder\,c\,(r_1\cdot r_2) \dn + \begin{cases} + \prod\,(pder\,c\,r_1)\,r_2\;\cup\;pder\,c\,r_2 & + \!\!\textit{provided}\,r_1\, \textit{is nullable}\\ + \prod\,(pder\,c\,r_1)\,r_2 & \!\!\textit{otherwise}\\ + \end{cases} + \] +\item $\textit{Psuf}\,s$ is the set of all non-empty suffixes of $s$ defined as + \[ + \textit{PSuf}\, s \dn \{v.\;v \not= [] \wedge \exists u. u \,@\, v = s \} +\] + +So for the string $abc$ the non-empty suffixes are $c$, $bc$ and +$abc$. Also we have that +$\textit{Psuf}\,(s\,@\,[c]) = ((\textit{Psuf}\,s)\,@@\,[c]) \cup +\{[c]\}$. Here $@@$ means to concatenate $[c]$ to the end of +all strings in $\textit{Psuf}\,s$; in this equation we also +need to add $\{[c]\}$ in order to make the equation to hold. +\end{itemize} + +\noindent +To state Antimirov's result we need the following definition of an +\emph{alphabetic width} of a regular expression defined as follows: + +\begin{center} +\begin{tabular}{lcl} + $awidth(\ZERO)$ & $\dn$ & $0$\\ + $awidth(\ONE)$ & $\dn$ & $0$\\ + $awidth(c)$ & $\dn$ & $1$\\ + $awidth(r_1 + r_2)$ & $\dn$ & $awidth(r_1) + awidth(r_2)$\\ + $awidth(r_1 \cdot r_2)$ & $\dn$ & $awidth(r_1) + awidth(r_2)$\\ + $awidth(r^*)$ & $\dn$ & $awidth(r)$\\ +\end{tabular} +\end{center} + +\noindent +This function counts how many characters are in a regular expression. +Antimirov's result states + +\begin{thm}\label{one} +$\forall\,A\,r\,.\;\;|Pders\;A\;r| \leq awidth(r) + 1$ +\end{thm} + +\noindent +Note this theorem holds for any set of strings $A$, for example +for the set of all strings, which I will write as $\textit{UNIV}$, and also +for the set $\{s\}$ containing only a single string $s$. Therefore a +simple corollary is + +\begin{cor} +$\forall\,s\,r\,.\;\;|pders\;s\;r| \leq awidth(r) + 1$ +\end{cor} + +\noindent +This property says that for every string $s$, the number of regular +expressions in the derivative can never be bigger than +$awidth(r) + 1$. Interestingly we do not show Thm~\ref{one} for all +sets of strings $A$ directly, but rather only for one particular set of +strings which I call $UNIV_1$. It includes all strings except the empty string +(remember $UNIV$ contains all strings).\bigskip + +\noindent +Let's try to give below a comprehensible account of Antimirov's proof +of Thm.~\ref{one}---I distictly remember that Antimirov's paper is +great, but pretty incomprehensible for the first 20+ times one reads +that paper. The proof starts with the following much weaker property +about the size being finite: + +\begin{lem} +$\forall\,A\,r\,.\;\;(Pders\;A\;r)$ is a finite set. +\end{lem} + +\noindent This lemma is needed because some reasoning steps in +Thm~\ref{one} only work for finite sets, not infinite sets. But let us +skip over the proof of this property at first and let us assume we +know already that the partial derivatives are always finite sets (this for +example does not hold for unsimplified Brzozowski derivatives which +can be infinite for some sets of strings). + +There are some central lemmas about partial derivatives for $\cdot$ and $\_^*$. +One is the following + +\begin{lem}\label{central}\mbox{}\\ + \[Pders\,UNIV_1\,(r_1\cdot r_2) \subseteq (\prod (Pders\,UNIV_1\, r_1)\,r_2) \; + \cup \; Pders\,UNIV_1\,r_2\] +\end{lem} + +\begin{proof} + \noindent The proof is done via an induction for the following property + \[ + pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \; + \cup \; Pders\,(\textit{PSuf}\,s)\,r_2 + \] + + \noindent + Note that this property uses $pders$ and $Pders$ together. The proof is done + by ``reverse'' induction on $s$, meaning the cases to analyse are the + empty string $[]$ and the case where a character is put at the end of the + string $s$, namely $s \,@\, [c]$. The case $[]$ is trivial. In the other + case we know by IH that + + \[ + pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \; + \cup \; Pders\,(\textit{PSuf}\,s)\,r_2 + \] + + \noindent + holds for $s$. Then we have to show it holds for $s\,@\,[c]$ + + \begin{center} + \begin{tabular}{r@{\hspace{2mm}}ll} + & $pders\,(s\,@\,[c])\,(r_1\cdot r_2)$\\ + $=$ & $pder^+\,c\,(pders\,s\,(r_1\cdot r_2))$\\ + $\subseteq$ & $pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2 \; + \cup \; Pders\,(\textit{PSuf}\,s)\,r_2)$\\ + & \hfill{}by IH\\ + $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\; + (pder^+\,c\,(Pders\,(\textit{PSuf}\,s)\,r_2))$\\ + $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\; + (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\ + $\subseteq$ & + $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\; + (pder\,c\,r_2)\;\cup\; + (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\ + $\subseteq$ & $\prod (pder^+\,c\,(pders\,s\, r_1))\,r_2\;\cup\; + (pder\,c\,r_2)\;\cup\; + (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\ + $=$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\; + (pder\,c\,r_2)\;\cup\; + (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\ + $\subseteq$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\; + (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\ + \end{tabular} + \end{center} + + \noindent The lemma now follows because for an $s \in UNIV_1$ it holds that + + \[ + \prod\,(pders\,s\,r_1)\,r_2 \subseteq \prod (Pders\,UNIV_1\, r_1)\,r_2 + \] + + \noindent and + + \[ + Pders\,(\textit{PSuf}\,s)\,r_2 \subseteq Pders\,UNIV_1\,r_2 + \] + + \noindent The left-hand sides of the inclusions above are + euqal to $pders\,s\,(r_1\cdot r_2)$ for a string $s\in UNIV_1$. +\end{proof}\medskip + +\noindent There is a similar lemma for the $^*$-regular expression, namely: + +\begin{lem}\label{centraltwo} +$Pders\,UNIV_1\,(r^*) \subseteq \prod\, (Pders\,UNIV_1\,r)\,(r^*)$ +\end{lem} + +\noindent +We omit the proof for the moment (similar to Lem~\ref{central}). We +also need the following property about the cardinality of $\prod$: + +\begin{lem}\label{centralthree} + $|\prod\,(Pders\,A\,r_1)\,r_2| \le |Pders\,A\,r_1|$ +\end{lem} + +\noindent +We only need the $\le$ version, which essentially says there +are as many sequences $r\cdot r_2$ as are elements in $r$. Now +for the proof of Thm~\ref{one}: The main induction in +Antimirov's proof establishes that:\footnote{Remember that it is + always the hardest part in an induction proof to find the right + property that is strong enough and of the right shape for the + induction to go through.} + +\begin{lem}\label{two} +$\forall r.\;\;|Pders\;UNIV_1\;r| \leq awidth(r)$ +\end{lem} + +\begin{proof} This is proved by induction on + $r$. The interesting cases are $r_1 + r_2$, $r_1\cdot r_2$ and + $r^*$. Let us start with the relatively simple case:\medskip + +\noindent +\textbf{Case $r_1 + r_2$:} By induction hypothesis we know + +\begin{center} +\begin{tabular}{l} + $|Pders\;UNIV_1\;r_1| \leq awidth(r_1)$\\ + $|Pders\;UNIV_1\;r_2| \leq awidth(r_2)$ +\end{tabular} +\end{center} + +\noindent +In this case we can reason as follows + +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}ll} + & $|Pders\;UNIV_1\;(r_1+r_2)|$\\ + $=$ & $|(Pders\;UNIV_1\;r_1) \;\cup\; (Pders\;UNIV_1\;r_2)|$\\ + $\leq$ & $|(Pders\;UNIV_1\;r_1)| \;+\; |(Pders\;UNIV_1\;r_2)|$ & (*)\\ + $\leq$ & $awidth(r_1) + awidth(r_2)$\\ + $\dn$ & $awidth(r_1 + r_2)$ +\end{tabular} +\end{center} + +\noindent Note that (*) is a step that only works if one knows that +$|(Pders\;UNIV_1\;r_1)|$ and so on are finite. The next case is +a bit more interesting:\medskip + +\noindent +\textbf{Case $r_1 \cdot r_2$:} We have the same induction +hypothesis as in the case before. + +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}ll} + & $|Pders\;UNIV_1\;(r_1\cdot r_2)|$\\ + $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2\;\cup\; (Pders\;UNIV_1\;r_2)|$ + & by Lem~\ref{central}\\ + $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2| \;+\; |(Pders\;UNIV_1\;r_2)|$\\ + $\leq$ & $|Pders\;UNIV_1\;r_1| \;+\; |Pders\;UNIV_1\;r_2|$ + & by Lem~\ref{centralthree} \\ + $\leq$ & $awidth(r_1) + awidth(r_2)$\\ + $\dn$ & $awidth(r_1 \cdot r_2)$ +\end{tabular} +\end{center} \medskip + +\noindent +\textbf{Case $r^*$:} Again we have the same induction +hypothesis as in the cases before. + +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}ll} + & $|Pders\;UNIV_1\;(r^*)|$\\ + $\leq$ & $|\prod\,(Pders\;UNIV_1\;r)\,(r^*)|$ + & by Lem~\ref{centraltwo}\\ + $\leq$ & $|Pders\;UNIV_1\;r|$ + & by Lem~\ref{centralthree} \\ + $\leq$ & $awidth(r)$\\ +\end{tabular} +\end{center} +\end{proof}\bigskip + +\noindent +From this lemma we can derive the next corrollary which extends +the property to $UNIV$ ($= UNIV_1 \cup \{[]\}$): + +\begin{cor} +$\forall r.\;\;|Pders\;UNIV\;r| \leq awidth(r) + 1$ +\end{cor} + +\begin{proof} This can be proved as follows +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}ll} + & $|Pders\;UNIV\;r|$\\ + $=$ & $|Pders\;(UNIV_1 \cup \{[]\})\;r|$\\ + $=$ & $|(Pders\;UNIV_1\,r) \;\cup\;\{r\}|$\\ + $\leq$ & $|Pders\;UNIV_1\,r| + 1$ & by Lem~\ref{two}\\ + $\leq$ & $awidth(r) + 1$\\ +\end{tabular} +\end{center} +\end{proof} + +\noindent +From the last corollary, it is easy to infer Antimirov's +Thm~\ref{one}, because + +\[ Pders\,A\,r \subseteq Pders\,UNIV\,r \] + + +\noindent +for all sets $A$.\bigskip + +\noindent +While I was earlier a bit dismissive above about the intelligibility +of Antimirov's paper, you have to admit this proof is a work of +beauty. It only gives a bound (\textit{awidth}) for the number of +regular expressions in the de\-rivatives---this is important for +constructing NFAs. Maybe it has not been important before, but I have +never seen a result about the \emph{size} of the partial +derivatives.\footnote{Update: I have now seen a paper which proves + this result as well.} However, a very crude bound, namely +$(size(r)^2 + 1) \times (awidth(r) + 1)$, can be easily derived by +using Antimirov's result. The definition we need for this is a +function that collects subexpressions from which partial derivatives +are built: + + +\begin{center} +\begin{tabular}{lcl} + $subs(\ZERO)$ & $\dn$ & $\{\ZERO\}$\\ + $subs(\ONE)$ & $\dn$ & $\{\ONE\}$\\ + $subs(c)$ & $\dn$ & $\{c, \ONE\}$\\ + $subs(r_1 + r_2)$ & $\dn$ & $\{r_1 + r_2\}\,\cup\,subs(r_1) \,\cup\, subs(r_2)$\\ + $subs(r_1 \cdot r_2)$ & $\dn$ & $\{r_1 \cdot r_2\}\,\cup (\prod\,subs(r_1)\;r_2)\,\cup \, + subs(r_1) \,\cup\, subs(r_2)$\\ + $subs(r^*)$ & $\dn$ & $\{r^*\}\,\cup\,(\prod\,subs(r)\;r^*) \,\cup\, subs(r)$\\ +\end{tabular} +\end{center} + +\noindent +We can show that + +\begin{lem} +$pders\,s\,r \subseteq subs(r)$ +\end{lem} + +\noindent +This is a relatively simple induction on $r$. The point is that for every element +in $subs$, the maximum size is given by + +\begin{lem} + If $r' \in subs(r)$ then $size(r') \le 1 + size(r)^2$. +\end{lem} + +\noindent +Again the proof is a relatively simple induction on $r$. Stringing Antimirov's result +and the lemma above together gives + +\begin{thm} +$\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r)^2 + 1) \times (awidth(r) + 1)$ +\end{thm} + +\noindent +Since $awidth$ is always smaller than the $size$ of a regular expression, +one can also state the bound as follows: + +\[ +\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r) + 1)^3 +\] + +\noindent +This, by the way, also holds for $Pders$, namely + +\[ +\sum_{r' \in Pders\,A\,r}.\;size(r') \le (size(r) + 1)^3 +\] + +\noindent +for all $r$ and $A$. If one is interested in the height of the partial derivatives, one can derive: + +\[ +\forall\,r' \in pders\,s\,r.\;height(r') \le height(r) + 1 +\] + + +\noindent +meaning the height of the partial derivatives is never bigger than +the height of the original regular expression (+1). + +\section*{NFA Construction via Antimirov's Partial Derivatives} + +Let's finish these notes with the construction of an NFA for a regular +expression using partial derivatives. As usual an automaton is a +quintuple $(Q, A, \delta, q_0, F)$ where $Q$ is the set of states of +the automaton, $A$ is the alphabet, $q_0$ is the starting state and +$F$ are the accepting states. For DFAs the $\delta$ is a (partial) +function from state $\times$ character to state. For NFAs it is a +relation between state $\times$ character $\times$ state. The +non-determinism can be seen by the following: consider three +(distinct) states $q_1$, $q_2$ and $q_3$, then the relation can +include $(q_1, a, q_2)$ and $(q_1, a, q_3)$, which means there is a +transition between $q_1$ and both $q_2$ and $q_3$ for the character +$a$. + +The Antimirov's NFA for a regular expression $r$ is then +given by the quintuple +\[(PD(r), A, \delta_{PD}, r, F)\] + +\noindent +where $PD(r)$ are all the partial +derivatives according to all strings, that is +\[ +PD(r) \;\dn\; \textit{Pders}\;\textit{UNIV}\;r +\] + +\noindent +Because of the previous proof, we know that this set is finite. We +also see that the states in Antimirov's NFA are ``labelled'' by single +regular expressions. The starting state is labelled with the original +regular expression $r$. The set of accepting states $F$ is all states +$r'\in F$ where $r'$ is nullable. The relation $\delta_{PD}$ is given by +\[ +(r_1, c, r_2) +\] + +\noindent +for every $r_1 \in PD(r)$ and $r_2 \in \textit{pder}\,c\,r$. This is +in general a ``non-deterministic'' relation because the set of partial +derivatives often contains more than one element. A nice example of +an NFA constructed via Antimirov's partial derivatives is given in +\cite{IlieYu2003} on Page 378. + +The difficulty of course in this construction is to find the set of +partial derivatives according to \emph{all} strings. However, it seem +a procedure that enumerates strings according to size suffices until +no new derivative is found. There are various improvements that apply +clever tricks on how to more efficiently discover this set. + + +\begin{thebibliography}{999} + +\bibitem{IlieYu2003} + L.~Ilie and S.~Yu, + \emph{Reducing NFAs by Invariant Equivalences}. + In Theoretical Computer Science, Volume 306(1--3), Pages 373–-390, 2003.\\ + \url{https://core.ac.uk/download/pdf/82545723.pdf} + +\end{thebibliography} + + + + + + +\end{document} + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 0c491eff5b01 -r 14e5ae1fb541 handouts/build.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/handouts/build.sc Fri Sep 05 16:59:48 2025 +0100 @@ -0,0 +1,68 @@ +#!/usr/bin/env amm + + +/* +val files = Seq("hw01.tex", + "hw02.tex", + "hw03.tex", + "hw04.tex", + "hw05.tex", + "hw06.tex", + "hw07.tex", + "hw08.tex", + "hw09.tex") + +val pdf_files = files.map(s => s.stripSuffix("tex") ++ "pdf") + + +@main +def make() = { + for (f <- files) { + println(s"Processing $f ...") + os.proc("lualatex", f).call(stdout = os.Inherit, stdin = os.Inherit) + } +} + +@main +def make_sols() = { + for (f <- files) { + val old_pdf = f.stripSuffix(".tex") ++ ".pdf" + val new_pdf = f.stripSuffix(".tex") ++ "-sol.pdf" + println(s"Processing $f -> $new_pdf ...") + os.proc("lualatex", f, "sol").call(stdout = os.Inherit, stdin = os.Inherit) + os.move.over(os.pwd / old_pdf, os.pwd / new_pdf) + } +} + +@main +def all() = { + make_sols() + make() +} + +@main +def hg() = { + println(os.proc("hg", "commit", "-m texupdate", files ++ pdf_files).call()) + println(os.proc("hg", "push").call()) +} + +*/ + +// extensions of files to be deleted +val todelete = + List("fdb_latexmk", + "log", + "aux", + "xdv", + "out", + "fls") + + +@main +def clean() = { + for (f <- os.list.stream(os.pwd) if todelete.contains(f.ext)) { + println(s"Delete: ${f.last}") + os.remove(f) + } +} + diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/Matcher.thy --- a/progs/Matcher.thy Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/Matcher.thy Fri Sep 05 16:59:48 2025 +0100 @@ -107,6 +107,8 @@ apply(simp only: L.simps) apply(simp) (* SEQ case *) +apply(simp only: L.simps) +apply(simp) oops lemma nullable_correctness: diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/crawler.scala --- a/progs/crawler.scala Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/crawler.scala Fri Sep 05 16:59:48 2025 +0100 @@ -1,3 +1,15 @@ +// Try out requests library +// +// https://github.com/com-lihaoyi/requests-scala +// +// +import $ivy.`com.lihaoyi::requests:0.9.0` + + + +requests.get("https://nms.kcl.ac.uk/christian.urban/") + + import io.Source import scala.util.matching.Regex diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/lexer/lex.sc --- a/progs/lexer/lex.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/lexer/lex.sc Fri Sep 05 16:59:48 2025 +0100 @@ -7,28 +7,33 @@ // -// regular expressions including records -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - // records for extracting strings or tokens - +// regular expressions including recods +// for extracting strings or tokens +enum Rexp { + case ZERO + case ONE + case CHAR(c: Char) + case ALT(r1: Rexp, r2: Rexp) + case SEQ(r1: Rexp, r2: Rexp) + case STAR(r: Rexp) + case RECD[A](label: A, r: Rexp) +} +import Rexp._ + // values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - +enum Val { + case Empty + case Chr(c: Char) + case Sequ(v1: Val, v2: Val) + case Left(v: Val) + case Right(v: Val) + case Stars(vs: List[Val]) + case Rec[A](label: A, v: Val) +} +import Val._ + // some convenience for typing in regular expressions +import scala.language.implicitConversions def charlist2rexp(s : List[Char]): Rexp = s match { case Nil => ONE @@ -48,9 +53,6 @@ // to use & for records, instead of $ which had // its precedence be changed in Scala 3 -extension (s: String) { - def & (r: Rexp) = RECD(s, r) -} val TEST = ("ab" | "ba").% @@ -91,21 +93,27 @@ // extracts an environment from a value; // used for tokenising a string -def env(v: Val) : List[(String, String)] = v match { +//import scala.reflect.ClassTag + +def env[A](v: Val) : List[(A, String)] = v match { case Empty => Nil case Chr(c) => Nil case Left(v) => env(v) case Right(v) => env(v) case Sequ(v1, v2) => env(v1) ::: env(v2) case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) + case Rec[A](x, v) => (x, flatten(v))::env(v) } // The injection and mkeps part of the lexer //=========================================== -def mkeps(r: Rexp) : Val = r match { +// the pattern-matches are defined to be @unchecked +// because they do not need to be defined for +// all cases + +def mkeps(r: Rexp) : Val = (r: @unchecked) match { case ONE => Empty case ALT(r1, r2) => if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) @@ -114,7 +122,7 @@ case RECD(x, r) => Rec(x, mkeps(r)) } -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { +def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match { case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) @@ -132,13 +140,27 @@ case c::cs => inj(r, c, lex(der(c, r), cs)) } -def lexing(r: Rexp, s: String) = - env(lex(r, s.toList)) + println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList)) println(lex(STAR("aa" | "a"), "aaa".toList)) +println(lex(STAR(STAR("a")), "aaa".toList)) + +val re = ("a" | "ab") ~ ("c" | "bc") + +println(pders1("abc", re).toList.mkString("\n")) +pders('a', pder('a', re)))) +draw(simp(der('a', der('a', der('a', re))))) + +size(simp(ders(, re))) +size(simp(der('a', der('a', re)))) +size(simp(der('a', der('a', der('a', re))))) + + +lex(re, "aaaaa".toList) + // The Lexing Rules for the WHILE Language def PLUS(r: Rexp) = r ~ r.% @@ -163,25 +185,31 @@ val STRING: Rexp = "\"" ~ SYM.% ~ "\"" -val WHILE_REGS = (("k" & KEYWORD) | - ("i" & ID) | - ("o" & OP) | - ("n" & NUM) | - ("s" & SEMI) | - ("str" & STRING) | - ("p" & (LPAREN | RPAREN)) | - ("w" & WHITESPACE)).% +enum TAGS { + case Key, Id, Op, Num, Semi, Str, Paren, Wht +} +import TAGS._ + +extension (t: TAGS) { + def & (r: Rexp) = RECD[TAGS](t, r) +} -val KY : Rexp = "if" | "read" | "write" -val WH : Rexp = " " | "\n" +def lexing(r: Rexp, s: String) = + env[TAGS](lex(r, s.toList)) -val TRIV_REGS = (("k" & KY) | - ("w" & WHITESPACE)).% +val WHILE_REGS = ((Key & KEYWORD) | + (Id & ID) | + (Op & OP) | + (Num & NUM) | + (Semi & SEMI) | + (Str & STRING) | + (Paren & (LPAREN | RPAREN)) | + (Wht & WHITESPACE)).% + // Two Simple While Tests //======================== - -//@arg(doc = "small tests") + @main def small() = { diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/matcher/re1.sc --- a/progs/matcher/re1.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/matcher/re1.sc Fri Sep 05 16:59:48 2025 +0100 @@ -10,15 +10,16 @@ // -// regular expressions -abstract class Rexp -case object ZERO extends Rexp // matches nothing -case object ONE extends Rexp // matches an empty string -case class CHAR(c: Char) extends Rexp // matches a character c -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence -case class STAR(r: Rexp) extends Rexp // star - +// regular expressions (as enum in Scala 3) +enum Rexp { + case ZERO // matches nothing + case ONE // matches an empty string + case CHAR(c: Char) // matches a character c + case ALT(r1: Rexp, r2: Rexp) // alternative + case SEQ(r1: Rexp, r2: Rexp) // sequence + case STAR(r: Rexp) // star +} +import Rexp._ // nullable function: tests whether a regular // expression can recognise the empty string @@ -172,9 +173,74 @@ } } + + +// Some code for pretty printing regexes as trees + +def implode(ss: Seq[String]) = ss.mkString("\n") +def explode(s: String) = s.split("\n").toList + +def lst(s: String) : String = explode(s) match { + case hd :: tl => implode(" └" ++ hd :: tl.map(" " ++ _)) + case Nil => "" +} + +def mid(s: String) : String = explode(s) match { + case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _)) + case Nil => "" +} + +def indent(ss: Seq[String]) : String = ss match { + case init :+ last => implode(init.map(mid) :+ lst(last)) + case _ => "" +} + +def pp(e: Rexp) : String = e match { + case ZERO => "0\n" + case ONE => "1\n" + case CHAR(c) => s"$c\n" + case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2) + case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2) + case STAR(r) => "STAR\n" ++ pps(r) +} +def pps(es: Rexp*) = indent(es.map(pp)) + + @main -def all() = { test1(); test2() ; test3() } +def test4() = { + println(pp(r2)) + println(pp(ders("x".toList, r2))) + println(pp(ders("xy".toList, r2))) + println(pp(ders("xyz".toList, r2))) +} + +@main +def all() = { test1(); test2() ; test3() ; test4() } + +// partial derivatives produce a set of regular expressions +def pder(c: Char, r: Rexp) : Set[Rexp] = r match { + case ZERO => Set() + case ONE => Set() + case CHAR(d) => if (c == d) Set(ONE) else Set() + case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2) + case SEQ(r1, r2) => { + (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++ + (if (nullable(r1)) pder(c, r2) else Set()) + } + case STAR(r1) => { + for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1)) + } +} + +def pders(s: List[Char], rs: Set[Rexp]) : Set[Rexp] = s match { + case Nil => rs + case c::s => pders(s, rs.flatMap(pder(c, _))) +} + +def pders1(s: String, r: Rexp) = pders(s.toList, Set(r)) + + diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/matcher/re3.sc --- a/progs/matcher/re3.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/matcher/re3.sc Fri Sep 05 16:59:48 2025 +0100 @@ -200,3 +200,42 @@ def fail() = { test3(); test4(); test5() } +// simplification +def simp2(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp2(r1), simp2(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + //case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + case (r1s, r2s) => ALT(r1s, r2s) + } + case SEQ(r1, r2) => (simp2(r1), simp2(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + //case (ONE, r2s) => r2s + //case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + +// some convenience for typing in regular expressions +import scala.language.implicitConversions + +def charlist2rexp(s : List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} + +given Conversion[String, Rexp] = (s => charlist2rexp(s.toList)) + +val HELLO : Rexp = "hello" + +extension (r: Rexp) { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +val re = (ONE | "a" | "ab") ~ ("c" | "bc") ~ ("d" | "e") +simp2(der('d', der('c', der('b', der('a', re))))) \ No newline at end of file diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/parser-combinators/comb2.sc --- a/progs/parser-combinators/comb2.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/parser-combinators/comb2.sc Fri Sep 05 16:59:48 2025 +0100 @@ -59,6 +59,8 @@ } } + + // atomic parser for identifiers (variable names) case object IdParser extends Parser[String, String] { val reg = "[a-z][a-z,0-9]*".r @@ -83,11 +85,20 @@ // the following string interpolation allows us to write // StrParser(_some_string_) more conveniently as // -// p"<_some_string_>" +// p"_some_string_" extension (sc: StringContext) def p(args: Any*) = StrParser(sc.s(args*)) + + +// the following is an alternative way to achieve the same, except +// that the p needs parentheses as in +// +// p("_some_string_") +// +//import StrParser.{apply => p} + // the abstract syntax trees for the WHILE language abstract class Stmt abstract class AExp diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/while-arrays/compile_arrays.sc --- a/progs/while-arrays/compile_arrays.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/while-arrays/compile_arrays.sc Fri Sep 05 16:59:48 2025 +0100 @@ -7,7 +7,7 @@ // // amm compile_arrays.sc -//> using toolkit 0.6.0 +//> using toolkit 0.7.0 // the abstract syntax trees for WHILE diff -r 0c491eff5b01 -r 14e5ae1fb541 slides/slides01.pdf Binary file slides/slides01.pdf has changed