--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/antimirov.tex Fri Sep 05 16:59:48 2025 +0100
@@ -0,0 +1,477 @@
+\documentclass[12pt]{article}
+\usepackage{../style}
+\usepackage{../langs}
+\usepackage{graphicx}
+
+\newtheorem{thm}{Theorem}
+\newtheorem{lem}[thm]{Lemma}
+\newtheorem{cor}[thm]{Corollary}
+\newenvironment{proof}{\paragraph{Proof:}\it}{\hfill$\square$}
+
+
+\begin{document}
+
+
+\section*{Antimirov's Proof about Pders}
+
+These are some rough notes about the result by Antimirov establishing
+a bound on the number of regular expressions in a partial
+derivative. From this bound on the number of partial derivatives one
+can easily construct an NFA for a regular expression, but one can also
+derive a bound on the size of the partial derivatives. This is what we
+do first. We start with the following definitions:
+
+\begin{itemize}
+\item $pder\,c\,r$ --- partial derivative according to a character; this can be defined
+ inductively as follows:
+
+ \begin{center}
+ \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
+ $\textit{pder}\, c\, (\ZERO)$ & $\dn$ & $\emptyset$\\
+ $\textit{pder}\, c\, (\ONE)$ & $\dn$ & $\emptyset$ \\
+ $\textit{pder}\, c\, (d)$ & $\dn$ & if $c = d$ then $\{\ONE\}$ else $\emptyset$\\
+ $\textit{pder}\, c\, (r_1 + r_2)$ & $\dn$ & $\textit{pder}\, c\, r_1 \;\cup\; \textit{pder}\, c\, r_2$\\
+ $\textit{pder}\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $\textit{nullable} (r_1)$\\
+ & & then $\Pi\,(\textit{pder}\,c\,r_1)\,r_2 \;\cup\; \textit{pder}\, c\, r_2$\\
+ & & else $\Pi\,(\textit{pder}\, c\, r_1)\,r_2$\\
+ $\textit{pder}\, c\, (r^*)$ & $\dn$ & $\Pi\,(\textit{pder}\,c\,r)\, (r^*)$
+ \end{tabular}
+\end{center}
+
+\item $pder^+\,c\,\,rs$ --- partial derivatives for a set regular exprssions $rs$
+\item $pders\,s\,r$ --- partial derivative of a regular expression
+ according to a string
+
+\item $Pders\,A\,r \dn \bigcup_{s\in A}. pders\,s\,r$ --- partial
+ derivatives according to a language (a set of strings)
+\item $|rs|$ is the size of a set of regular expressions $rs$, or
+ the number of elements in the
+ set (also known as the cardinality of this set)
+\item $\prod\,rs\;r \dn \{r_1 \cdot r \;|\; r_1 \in rs\}$ --- this is
+ some convenience when writing a set of sequence regular
+ expressions. It essentially ``appends'' the regular expression $r$ to all
+ regular expressions in the set $rs$. As a result one can write
+ the sequence case for partial derivatives (see above) more conveniently
+ as
+ \[
+ pder\,c\,(r_1\cdot r_2) \dn
+ \begin{cases}
+ \prod\,(pder\,c\,r_1)\,r_2\;\cup\;pder\,c\,r_2 &
+ \!\!\textit{provided}\,r_1\, \textit{is nullable}\\
+ \prod\,(pder\,c\,r_1)\,r_2 & \!\!\textit{otherwise}\\
+ \end{cases}
+ \]
+\item $\textit{Psuf}\,s$ is the set of all non-empty suffixes of $s$ defined as
+ \[
+ \textit{PSuf}\, s \dn \{v.\;v \not= [] \wedge \exists u. u \,@\, v = s \}
+\]
+
+So for the string $abc$ the non-empty suffixes are $c$, $bc$ and
+$abc$. Also we have that
+$\textit{Psuf}\,(s\,@\,[c]) = ((\textit{Psuf}\,s)\,@@\,[c]) \cup
+\{[c]\}$. Here $@@$ means to concatenate $[c]$ to the end of
+all strings in $\textit{Psuf}\,s$; in this equation we also
+need to add $\{[c]\}$ in order to make the equation to hold.
+\end{itemize}
+
+\noindent
+To state Antimirov's result we need the following definition of an
+\emph{alphabetic width} of a regular expression defined as follows:
+
+\begin{center}
+\begin{tabular}{lcl}
+ $awidth(\ZERO)$ & $\dn$ & $0$\\
+ $awidth(\ONE)$ & $\dn$ & $0$\\
+ $awidth(c)$ & $\dn$ & $1$\\
+ $awidth(r_1 + r_2)$ & $\dn$ & $awidth(r_1) + awidth(r_2)$\\
+ $awidth(r_1 \cdot r_2)$ & $\dn$ & $awidth(r_1) + awidth(r_2)$\\
+ $awidth(r^*)$ & $\dn$ & $awidth(r)$\\
+\end{tabular}
+\end{center}
+
+\noindent
+This function counts how many characters are in a regular expression.
+Antimirov's result states
+
+\begin{thm}\label{one}
+$\forall\,A\,r\,.\;\;|Pders\;A\;r| \leq awidth(r) + 1$
+\end{thm}
+
+\noindent
+Note this theorem holds for any set of strings $A$, for example
+for the set of all strings, which I will write as $\textit{UNIV}$, and also
+for the set $\{s\}$ containing only a single string $s$. Therefore a
+simple corollary is
+
+\begin{cor}
+$\forall\,s\,r\,.\;\;|pders\;s\;r| \leq awidth(r) + 1$
+\end{cor}
+
+\noindent
+This property says that for every string $s$, the number of regular
+expressions in the derivative can never be bigger than
+$awidth(r) + 1$. Interestingly we do not show Thm~\ref{one} for all
+sets of strings $A$ directly, but rather only for one particular set of
+strings which I call $UNIV_1$. It includes all strings except the empty string
+(remember $UNIV$ contains all strings).\bigskip
+
+\noindent
+Let's try to give below a comprehensible account of Antimirov's proof
+of Thm.~\ref{one}---I distictly remember that Antimirov's paper is
+great, but pretty incomprehensible for the first 20+ times one reads
+that paper. The proof starts with the following much weaker property
+about the size being finite:
+
+\begin{lem}
+$\forall\,A\,r\,.\;\;(Pders\;A\;r)$ is a finite set.
+\end{lem}
+
+\noindent This lemma is needed because some reasoning steps in
+Thm~\ref{one} only work for finite sets, not infinite sets. But let us
+skip over the proof of this property at first and let us assume we
+know already that the partial derivatives are always finite sets (this for
+example does not hold for unsimplified Brzozowski derivatives which
+can be infinite for some sets of strings).
+
+There are some central lemmas about partial derivatives for $\cdot$ and $\_^*$.
+One is the following
+
+\begin{lem}\label{central}\mbox{}\\
+ \[Pders\,UNIV_1\,(r_1\cdot r_2) \subseteq (\prod (Pders\,UNIV_1\, r_1)\,r_2) \;
+ \cup \; Pders\,UNIV_1\,r_2\]
+\end{lem}
+
+\begin{proof}
+ \noindent The proof is done via an induction for the following property
+ \[
+ pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \;
+ \cup \; Pders\,(\textit{PSuf}\,s)\,r_2
+ \]
+
+ \noindent
+ Note that this property uses $pders$ and $Pders$ together. The proof is done
+ by ``reverse'' induction on $s$, meaning the cases to analyse are the
+ empty string $[]$ and the case where a character is put at the end of the
+ string $s$, namely $s \,@\, [c]$. The case $[]$ is trivial. In the other
+ case we know by IH that
+
+ \[
+ pders\,s\,(r_1\cdot r_2) \subseteq (\prod (pders\,s\, r_1)\,r_2) \;
+ \cup \; Pders\,(\textit{PSuf}\,s)\,r_2
+ \]
+
+ \noindent
+ holds for $s$. Then we have to show it holds for $s\,@\,[c]$
+
+ \begin{center}
+ \begin{tabular}{r@{\hspace{2mm}}ll}
+ & $pders\,(s\,@\,[c])\,(r_1\cdot r_2)$\\
+ $=$ & $pder^+\,c\,(pders\,s\,(r_1\cdot r_2))$\\
+ $\subseteq$ & $pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2 \;
+ \cup \; Pders\,(\textit{PSuf}\,s)\,r_2)$\\
+ & \hfill{}by IH\\
+ $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+ (pder^+\,c\,(Pders\,(\textit{PSuf}\,s)\,r_2))$\\
+ $=$ & $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+ (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\
+ $\subseteq$ &
+ $(pder^+\,c\,(\prod (pders\,s\, r_1)\,r_2))\;\cup\;
+ (pder\,c\,r_2)\;\cup\;
+ (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+ $\subseteq$ & $\prod (pder^+\,c\,(pders\,s\, r_1))\,r_2\;\cup\;
+ (pder\,c\,r_2)\;\cup\;
+ (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+ $=$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\;
+ (pder\,c\,r_2)\;\cup\;
+ (Pders\,(\textit{PSuf}\,s\,@@\,[c])\,r_2)$\\
+ $\subseteq$ & $(\prod (pders\,(s\,@\,[c])\, r_1)\,r_2)\;\cup\;
+ (Pders\,(\textit{PSuf}\,(s\,@\,[c]))\,r_2)$\\
+ \end{tabular}
+ \end{center}
+
+ \noindent The lemma now follows because for an $s \in UNIV_1$ it holds that
+
+ \[
+ \prod\,(pders\,s\,r_1)\,r_2 \subseteq \prod (Pders\,UNIV_1\, r_1)\,r_2
+ \]
+
+ \noindent and
+
+ \[
+ Pders\,(\textit{PSuf}\,s)\,r_2 \subseteq Pders\,UNIV_1\,r_2
+ \]
+
+ \noindent The left-hand sides of the inclusions above are
+ euqal to $pders\,s\,(r_1\cdot r_2)$ for a string $s\in UNIV_1$.
+\end{proof}\medskip
+
+\noindent There is a similar lemma for the $^*$-regular expression, namely:
+
+\begin{lem}\label{centraltwo}
+$Pders\,UNIV_1\,(r^*) \subseteq \prod\, (Pders\,UNIV_1\,r)\,(r^*)$
+\end{lem}
+
+\noindent
+We omit the proof for the moment (similar to Lem~\ref{central}). We
+also need the following property about the cardinality of $\prod$:
+
+\begin{lem}\label{centralthree}
+ $|\prod\,(Pders\,A\,r_1)\,r_2| \le |Pders\,A\,r_1|$
+\end{lem}
+
+\noindent
+We only need the $\le$ version, which essentially says there
+are as many sequences $r\cdot r_2$ as are elements in $r$. Now
+for the proof of Thm~\ref{one}: The main induction in
+Antimirov's proof establishes that:\footnote{Remember that it is
+ always the hardest part in an induction proof to find the right
+ property that is strong enough and of the right shape for the
+ induction to go through.}
+
+\begin{lem}\label{two}
+$\forall r.\;\;|Pders\;UNIV_1\;r| \leq awidth(r)$
+\end{lem}
+
+\begin{proof} This is proved by induction on
+ $r$. The interesting cases are $r_1 + r_2$, $r_1\cdot r_2$ and
+ $r^*$. Let us start with the relatively simple case:\medskip
+
+\noindent
+\textbf{Case $r_1 + r_2$:} By induction hypothesis we know
+
+\begin{center}
+\begin{tabular}{l}
+ $|Pders\;UNIV_1\;r_1| \leq awidth(r_1)$\\
+ $|Pders\;UNIV_1\;r_2| \leq awidth(r_2)$
+\end{tabular}
+\end{center}
+
+\noindent
+In this case we can reason as follows
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+ & $|Pders\;UNIV_1\;(r_1+r_2)|$\\
+ $=$ & $|(Pders\;UNIV_1\;r_1) \;\cup\; (Pders\;UNIV_1\;r_2)|$\\
+ $\leq$ & $|(Pders\;UNIV_1\;r_1)| \;+\; |(Pders\;UNIV_1\;r_2)|$ & (*)\\
+ $\leq$ & $awidth(r_1) + awidth(r_2)$\\
+ $\dn$ & $awidth(r_1 + r_2)$
+\end{tabular}
+\end{center}
+
+\noindent Note that (*) is a step that only works if one knows that
+$|(Pders\;UNIV_1\;r_1)|$ and so on are finite. The next case is
+a bit more interesting:\medskip
+
+\noindent
+\textbf{Case $r_1 \cdot r_2$:} We have the same induction
+hypothesis as in the case before.
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+ & $|Pders\;UNIV_1\;(r_1\cdot r_2)|$\\
+ $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2\;\cup\; (Pders\;UNIV_1\;r_2)|$
+ & by Lem~\ref{central}\\
+ $\leq$ & $|\prod\,(Pders\;UNIV_1\;r_1)\,r_2| \;+\; |(Pders\;UNIV_1\;r_2)|$\\
+ $\leq$ & $|Pders\;UNIV_1\;r_1| \;+\; |Pders\;UNIV_1\;r_2|$
+ & by Lem~\ref{centralthree} \\
+ $\leq$ & $awidth(r_1) + awidth(r_2)$\\
+ $\dn$ & $awidth(r_1 \cdot r_2)$
+\end{tabular}
+\end{center} \medskip
+
+\noindent
+\textbf{Case $r^*$:} Again we have the same induction
+hypothesis as in the cases before.
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+ & $|Pders\;UNIV_1\;(r^*)|$\\
+ $\leq$ & $|\prod\,(Pders\;UNIV_1\;r)\,(r^*)|$
+ & by Lem~\ref{centraltwo}\\
+ $\leq$ & $|Pders\;UNIV_1\;r|$
+ & by Lem~\ref{centralthree} \\
+ $\leq$ & $awidth(r)$\\
+\end{tabular}
+\end{center}
+\end{proof}\bigskip
+
+\noindent
+From this lemma we can derive the next corrollary which extends
+the property to $UNIV$ ($= UNIV_1 \cup \{[]\}$):
+
+\begin{cor}
+$\forall r.\;\;|Pders\;UNIV\;r| \leq awidth(r) + 1$
+\end{cor}
+
+\begin{proof} This can be proved as follows
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}ll}
+ & $|Pders\;UNIV\;r|$\\
+ $=$ & $|Pders\;(UNIV_1 \cup \{[]\})\;r|$\\
+ $=$ & $|(Pders\;UNIV_1\,r) \;\cup\;\{r\}|$\\
+ $\leq$ & $|Pders\;UNIV_1\,r| + 1$ & by Lem~\ref{two}\\
+ $\leq$ & $awidth(r) + 1$\\
+\end{tabular}
+\end{center}
+\end{proof}
+
+\noindent
+From the last corollary, it is easy to infer Antimirov's
+Thm~\ref{one}, because
+
+\[ Pders\,A\,r \subseteq Pders\,UNIV\,r \]
+
+
+\noindent
+for all sets $A$.\bigskip
+
+\noindent
+While I was earlier a bit dismissive above about the intelligibility
+of Antimirov's paper, you have to admit this proof is a work of
+beauty. It only gives a bound (\textit{awidth}) for the number of
+regular expressions in the de\-rivatives---this is important for
+constructing NFAs. Maybe it has not been important before, but I have
+never seen a result about the \emph{size} of the partial
+derivatives.\footnote{Update: I have now seen a paper which proves
+ this result as well.} However, a very crude bound, namely
+$(size(r)^2 + 1) \times (awidth(r) + 1)$, can be easily derived by
+using Antimirov's result. The definition we need for this is a
+function that collects subexpressions from which partial derivatives
+are built:
+
+
+\begin{center}
+\begin{tabular}{lcl}
+ $subs(\ZERO)$ & $\dn$ & $\{\ZERO\}$\\
+ $subs(\ONE)$ & $\dn$ & $\{\ONE\}$\\
+ $subs(c)$ & $\dn$ & $\{c, \ONE\}$\\
+ $subs(r_1 + r_2)$ & $\dn$ & $\{r_1 + r_2\}\,\cup\,subs(r_1) \,\cup\, subs(r_2)$\\
+ $subs(r_1 \cdot r_2)$ & $\dn$ & $\{r_1 \cdot r_2\}\,\cup (\prod\,subs(r_1)\;r_2)\,\cup \,
+ subs(r_1) \,\cup\, subs(r_2)$\\
+ $subs(r^*)$ & $\dn$ & $\{r^*\}\,\cup\,(\prod\,subs(r)\;r^*) \,\cup\, subs(r)$\\
+\end{tabular}
+\end{center}
+
+\noindent
+We can show that
+
+\begin{lem}
+$pders\,s\,r \subseteq subs(r)$
+\end{lem}
+
+\noindent
+This is a relatively simple induction on $r$. The point is that for every element
+in $subs$, the maximum size is given by
+
+\begin{lem}
+ If $r' \in subs(r)$ then $size(r') \le 1 + size(r)^2$.
+\end{lem}
+
+\noindent
+Again the proof is a relatively simple induction on $r$. Stringing Antimirov's result
+and the lemma above together gives
+
+\begin{thm}
+$\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r)^2 + 1) \times (awidth(r) + 1)$
+\end{thm}
+
+\noindent
+Since $awidth$ is always smaller than the $size$ of a regular expression,
+one can also state the bound as follows:
+
+\[
+\sum_{r' \in pders\,s\,r}.\;size(r') \le (size(r) + 1)^3
+\]
+
+\noindent
+This, by the way, also holds for $Pders$, namely
+
+\[
+\sum_{r' \in Pders\,A\,r}.\;size(r') \le (size(r) + 1)^3
+\]
+
+\noindent
+for all $r$ and $A$. If one is interested in the height of the partial derivatives, one can derive:
+
+\[
+\forall\,r' \in pders\,s\,r.\;height(r') \le height(r) + 1
+\]
+
+
+\noindent
+meaning the height of the partial derivatives is never bigger than
+the height of the original regular expression (+1).
+
+\section*{NFA Construction via Antimirov's Partial Derivatives}
+
+Let's finish these notes with the construction of an NFA for a regular
+expression using partial derivatives. As usual an automaton is a
+quintuple $(Q, A, \delta, q_0, F)$ where $Q$ is the set of states of
+the automaton, $A$ is the alphabet, $q_0$ is the starting state and
+$F$ are the accepting states. For DFAs the $\delta$ is a (partial)
+function from state $\times$ character to state. For NFAs it is a
+relation between state $\times$ character $\times$ state. The
+non-determinism can be seen by the following: consider three
+(distinct) states $q_1$, $q_2$ and $q_3$, then the relation can
+include $(q_1, a, q_2)$ and $(q_1, a, q_3)$, which means there is a
+transition between $q_1$ and both $q_2$ and $q_3$ for the character
+$a$.
+
+The Antimirov's NFA for a regular expression $r$ is then
+given by the quintuple
+\[(PD(r), A, \delta_{PD}, r, F)\]
+
+\noindent
+where $PD(r)$ are all the partial
+derivatives according to all strings, that is
+\[
+PD(r) \;\dn\; \textit{Pders}\;\textit{UNIV}\;r
+\]
+
+\noindent
+Because of the previous proof, we know that this set is finite. We
+also see that the states in Antimirov's NFA are ``labelled'' by single
+regular expressions. The starting state is labelled with the original
+regular expression $r$. The set of accepting states $F$ is all states
+$r'\in F$ where $r'$ is nullable. The relation $\delta_{PD}$ is given by
+\[
+(r_1, c, r_2)
+\]
+
+\noindent
+for every $r_1 \in PD(r)$ and $r_2 \in \textit{pder}\,c\,r$. This is
+in general a ``non-deterministic'' relation because the set of partial
+derivatives often contains more than one element. A nice example of
+an NFA constructed via Antimirov's partial derivatives is given in
+\cite{IlieYu2003} on Page 378.
+
+The difficulty of course in this construction is to find the set of
+partial derivatives according to \emph{all} strings. However, it seem
+a procedure that enumerates strings according to size suffices until
+no new derivative is found. There are various improvements that apply
+clever tricks on how to more efficiently discover this set.
+
+
+\begin{thebibliography}{999}
+
+\bibitem{IlieYu2003}
+ L.~Ilie and S.~Yu,
+ \emph{Reducing NFAs by Invariant Equivalences}.
+ In Theoretical Computer Science, Volume 306(1--3), Pages 373–-390, 2003.\\
+ \url{https://core.ac.uk/download/pdf/82545723.pdf}
+
+\end{thebibliography}
+
+
+
+
+
+
+\end{document}
+
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End:
--- a/progs/lexer/lex.sc Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/lexer/lex.sc Fri Sep 05 16:59:48 2025 +0100
@@ -7,28 +7,33 @@
//
-// regular expressions including records
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
- // records for extracting strings or tokens
-
+// regular expressions including recods
+// for extracting strings or tokens
+enum Rexp {
+ case ZERO
+ case ONE
+ case CHAR(c: Char)
+ case ALT(r1: Rexp, r2: Rexp)
+ case SEQ(r1: Rexp, r2: Rexp)
+ case STAR(r: Rexp)
+ case RECD[A](label: A, r: Rexp)
+}
+import Rexp._
+
// values
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-
+enum Val {
+ case Empty
+ case Chr(c: Char)
+ case Sequ(v1: Val, v2: Val)
+ case Left(v: Val)
+ case Right(v: Val)
+ case Stars(vs: List[Val])
+ case Rec[A](label: A, v: Val)
+}
+import Val._
+
// some convenience for typing in regular expressions
+import scala.language.implicitConversions
def charlist2rexp(s : List[Char]): Rexp = s match {
case Nil => ONE
@@ -48,9 +53,6 @@
// to use & for records, instead of $ which had
// its precedence be changed in Scala 3
-extension (s: String) {
- def & (r: Rexp) = RECD(s, r)
-}
val TEST = ("ab" | "ba").%
@@ -91,21 +93,27 @@
// extracts an environment from a value;
// used for tokenising a string
-def env(v: Val) : List[(String, String)] = v match {
+//import scala.reflect.ClassTag
+
+def env[A](v: Val) : List[(A, String)] = v match {
case Empty => Nil
case Chr(c) => Nil
case Left(v) => env(v)
case Right(v) => env(v)
case Sequ(v1, v2) => env(v1) ::: env(v2)
case Stars(vs) => vs.flatMap(env)
- case Rec(x, v) => (x, flatten(v))::env(v)
+ case Rec[A](x, v) => (x, flatten(v))::env(v)
}
// The injection and mkeps part of the lexer
//===========================================
-def mkeps(r: Rexp) : Val = r match {
+// the pattern-matches are defined to be @unchecked
+// because they do not need to be defined for
+// all cases
+
+def mkeps(r: Rexp) : Val = (r: @unchecked) match {
case ONE => Empty
case ALT(r1, r2) =>
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
@@ -114,7 +122,7 @@
case RECD(x, r) => Rec(x, mkeps(r))
}
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match {
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
@@ -132,13 +140,27 @@
case c::cs => inj(r, c, lex(der(c, r), cs))
}
-def lexing(r: Rexp, s: String) =
- env(lex(r, s.toList))
+
println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList))
println(lex(STAR("aa" | "a"), "aaa".toList))
+println(lex(STAR(STAR("a")), "aaa".toList))
+
+val re = ("a" | "ab") ~ ("c" | "bc")
+
+println(pders1("abc", re).toList.mkString("\n"))
+pders('a', pder('a', re))))
+draw(simp(der('a', der('a', der('a', re)))))
+
+size(simp(ders(, re)))
+size(simp(der('a', der('a', re))))
+size(simp(der('a', der('a', der('a', re)))))
+
+
+lex(re, "aaaaa".toList)
+
// The Lexing Rules for the WHILE Language
def PLUS(r: Rexp) = r ~ r.%
@@ -163,25 +185,31 @@
val STRING: Rexp = "\"" ~ SYM.% ~ "\""
-val WHILE_REGS = (("k" & KEYWORD) |
- ("i" & ID) |
- ("o" & OP) |
- ("n" & NUM) |
- ("s" & SEMI) |
- ("str" & STRING) |
- ("p" & (LPAREN | RPAREN)) |
- ("w" & WHITESPACE)).%
+enum TAGS {
+ case Key, Id, Op, Num, Semi, Str, Paren, Wht
+}
+import TAGS._
+
+extension (t: TAGS) {
+ def & (r: Rexp) = RECD[TAGS](t, r)
+}
-val KY : Rexp = "if" | "read" | "write"
-val WH : Rexp = " " | "\n"
+def lexing(r: Rexp, s: String) =
+ env[TAGS](lex(r, s.toList))
-val TRIV_REGS = (("k" & KY) |
- ("w" & WHITESPACE)).%
+val WHILE_REGS = ((Key & KEYWORD) |
+ (Id & ID) |
+ (Op & OP) |
+ (Num & NUM) |
+ (Semi & SEMI) |
+ (Str & STRING) |
+ (Paren & (LPAREN | RPAREN)) |
+ (Wht & WHITESPACE)).%
+
// Two Simple While Tests
//========================
-
-//@arg(doc = "small tests")
+
@main
def small() = {