# HG changeset patch # User Christian Urban # Date 1761938771 0 # Node ID 36799f7b9702a21800549d93f1570236580363cd # Parent f71399fe3fdcd9f7424cf4b785dd04264248e30e updated diff -r f71399fe3fdc -r 36799f7b9702 handouts/ho04.pdf Binary file handouts/ho04.pdf has changed diff -r f71399fe3fdc -r 36799f7b9702 handouts/ho04.tex --- a/handouts/ho04.tex Fri Oct 31 11:25:14 2025 +0000 +++ b/handouts/ho04.tex Fri Oct 31 19:26:11 2025 +0000 @@ -91,14 +91,37 @@ letters, whereas values start with a single upper-case character and the rest are lower-case letters. -{\small\lstinputlisting[language=Scala,numbers=none,linebackgroundcolor= - {\ifodd\value{lstnumber}\color{capri!3}\fi}] -{../progs/app01.scala}} +{\small% +\begin{lstlisting}[language=Scala,numbers=none] + enum Rexp { + case ZERO + case ONE + case CHAR(c: Char) + case ALT(r1: Rexp, r2: Rexp) + case SEQ(r1: Rexp, r2: Rexp) + case STAR(r: Rexp) +} +\end{lstlisting} - -{\small\lstinputlisting[language=Scala,numbers=none,linebackgroundcolor= - {\ifodd\value{lstnumber}\color{capri!3}\fi}] -{../progs/app02.scala}} +\begin{lstlisting}[language=Scala,numbers=none] +enum Val { + case Empty + case Chr(c: Char) + case Sequ(v1: Val, v2: Val) + case Left(v: Val) + case Right(v: Val) + case Stars(vs: List[Val]) +} +\end{lstlisting}} + +%{\small\lstinputlisting[language=Scala,numbers=none,linebackgroundcolor= +% {\ifodd\value{lstnumber}\color{capri!3}\fi}] +%{../progs/app01.scala}} +% +% +%{\small\lstinputlisting[language=Scala,numbers=none,linebackgroundcolor= +% {\ifodd\value{lstnumber}\color{capri!3}\fi}] +%{../progs/app02.scala}} Graphically the algorithm by Sulzmann \& Lu can be illustrated @@ -110,7 +133,7 @@ first build the three derivatives (according to $a$, $b$ and $c$). We then use $\nullable$ to find out whether the resulting regular expression can match the empty string. If yes, we call -the function $\textit{mkeps}$. The $\textit{mkeps}$ function calculates a value for how a regular +the function $\textit{mkeps}$. This function calculates a value for how a regular expression has matched the empty string. Its definition is as follows: @@ -235,7 +258,8 @@ $\Left(\ldots)$. The point is that from this value we can directly read off which part of $r_4$ matched the empty string: take the right-alternative first, and then the -right-alternative again, then you got to the $\ONE$. +right-alternative again, then you got to the $\ONE$ (indicated by +\textit{Empty}). Next we have to ``inject'' the last character, that is $c$ in the running example, into this value $v_4$ in order to @@ -269,7 +293,8 @@ \end{center} \noindent This value corresponds to how the regular expression $r_1$, -namely $a \cdot (b \cdot c)$, matched the string $abc$. +namely $a \cdot (b \cdot c)$, matched the string $abc$. I let you +think whether it is the expected value. There are a few auxiliary functions that are of interest @@ -307,7 +332,7 @@ The definition of $\inj$ might still be very puzzling and each clause might look arbitrary, but there is in fact a relatively simple idea behind it. Ultimately the $\inj$-functions is determined by the -derivative functions. For this consider one of the ``squares'' from +derivative function. For this consider one of the ``squares'' from Figure~\ref{Sulz}: \begin{center} @@ -775,9 +800,19 @@ be regarded as a regular expression. The extended definition in Scala therefore looks as follows: -{\small\lstinputlisting[language=Scala, numbers=none,linebackgroundcolor= - {\ifodd\value{lstnumber}\color{capri!3}\fi}] -{../progs/app03.scala}} +{\small% +\begin{lstlisting}[language=Scala,numbers=none] + enum Rexp { + case ZERO + case ONE + case CHAR(c: Char) + case ALT(r1: Rexp, r2: Rexp) + case SEQ(r1: Rexp, r2: Rexp) + case STAR(r: Rexp) + case RECD(label: String, r: Rexp) +} +\end{lstlisting}} + \noindent Since we regard records as regular expressions we need to extend the functions $\nullable$ and $\der$. Similarly @@ -785,9 +820,19 @@ to extend the definition of values, which in Scala looks as follows: -{\small\lstinputlisting[language=Scala, numbers=none,linebackgroundcolor= - {\ifodd\value{lstnumber}\color{capri!3}\fi}] -{../progs/app04.scala}} +{\small% +\begin{lstlisting}[language=Scala,numbers=none] +enum Val { + case Empty + case Chr(c: Char) + case Sequ(v1: Val, v2: Val) + case Left(v: Val) + case Right(v: Val) + case Stars(vs: List[Val]) + case Rec(label: String, v: Val) +} +\end{lstlisting}} + \noindent Let us now look at the purpose of records more closely and let us return to our question whether the string @@ -803,7 +848,7 @@ the strings $ab$ and $ac$, but we can now use a function that takes a value and returns all records. I call this function \emph{env} for environment\ldots it builds a list -of identifiers associated with a string. This function +of labels associated with a string. This function can be defined as follows: \begin{center} @@ -826,15 +871,16 @@ contain many ``records''. If we now postprocess the value calculated by $lex$ extracting all records using $env$, we can answer the question whether the last element in the string was -an $b$ or a $c$. Lets see this in action: if we use $a\cdot b -+ a\cdot c$ and $ac$ the calculated value will be +an $b$ or a $c$. Lets see this in action: if we use the regular expression +$a\cdot b + a\cdot c$ and the string$ac$, then the calculated value of \textit{lex} +will be \begin{center} $Right(Seq(Char(a), Char(c)))$ \end{center} \noindent If we use instead $a\cdot (x:b) + a\cdot (x:c)$ and -use the $env$ function to extract the recording for +use the $env$ function to extract what is recorded for $x$ we obtain \begin{center} @@ -842,7 +888,9 @@ \end{center} \noindent If we had given the string $ab$ instead, then the -record would have been $[(x:b)]$. The fun starts if we +record would have been $[(x:b)]$. Note that $x$ is just an arbitrary +label we use to identify a place in the regular expression +where we are interested in what substring has been matched. The fun starts if we iterate this. Consider the regular expression \begin{center} @@ -926,8 +974,10 @@ \end{array}\right)^{\mbox{\LARGE{}*}}$ \end{center} -\noindent and ask the algorithm by Sulzmann \& Lu to lex, say -the following string +\noindent The string $k$ stands for keywords, $i$ for identifiers, $o$ for +operations and so on. These labels essentially allow us to classify what +strings we match with each alternative. Then we can ask the algorithm +by Sulzmann \& Lu to lex the following string \begin{center}\large \code{"if true then then 42 else +"} diff -r f71399fe3fdc -r 36799f7b9702 progs/lexer/lex.sc --- a/progs/lexer/lex.sc Fri Oct 31 11:25:14 2025 +0000 +++ b/progs/lexer/lex.sc Fri Oct 31 19:26:11 2025 +0000 @@ -139,7 +139,7 @@ -println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList)) +println(lex(("ab" | "ab") ~ (ONE | "b"), "ab".toList)) println(lex(STAR("aa" | "a"), "aaa".toList)) @@ -207,7 +207,7 @@ println(s"test: $prog1") println(lexing(WHILE_REGS, prog1)) - val prog2 = """read n; write n""" + val prog2 = """read n; write n; write n""" println(s"test: $prog2") println(lexing(WHILE_REGS, prog2)) } diff -r f71399fe3fdc -r 36799f7b9702 progs/lexer/lexer.sc --- a/progs/lexer/lexer.sc Fri Oct 31 11:25:14 2025 +0000 +++ b/progs/lexer/lexer.sc Fri Oct 31 19:26:11 2025 +0000 @@ -43,9 +43,7 @@ given Conversion[String, Rexp] = (s => charlist2rexp(s.toList)) -//extension (s: String) { -// def $ (r: Rexp) = RECD(s, r) -//} + extension (s: String) { def $ (r: Rexp) = RECD(s, r) def | (r: Rexp) = ALT(s, r) @@ -81,7 +79,7 @@ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) - case RECD(_, r1) => der(c, r1) + case RECD(label, r1) => der(c, r1) } // extracts a string from a value diff -r f71399fe3fdc -r 36799f7b9702 slides/slides05.pdf Binary file slides/slides05.pdf has changed diff -r f71399fe3fdc -r 36799f7b9702 slides/slides05.tex --- a/slides/slides05.tex Fri Oct 31 11:25:14 2025 +0000 +++ b/slides/slides05.tex Fri Oct 31 19:26:11 2025 +0000 @@ -188,7 +188,7 @@ \normalsize parser input: a sequence of tokens\smallskip\\ -{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\ +{\small\hspace{5mm}\code{key(read) lpar id(n) rpar}}\smallskip\\ parser output: an abstract syntax tree\smallskip\\ \footnotesize @@ -677,7 +677,7 @@ \end{mybox3}\bigskip - \hfill from the \href{http://www.computing.surrey.ac.uk/research/dsrg/fog/FogThesis.pdf}{PhD thesis} by Willink (2001) + \hfill from the \href{https://urbanchr.github.io/cfl/willing-cpp-thesis.pdf}{PhD thesis} by Willink (2001) \small \begin{center}