--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/lexer/lex.sc Wed Oct 21 14:55:20 2020 +0100
@@ -0,0 +1,191 @@
+// A simple lexer inspired by work of Sulzmann & Lu
+//==================================================
+//
+
+// regular expressions including records
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class RECD(x: String, r: Rexp) extends Rexp
+ // records for extracting strings or tokens
+
+// values
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+
+// some convenience for typing in regular expressions
+
+def charlist2rexp(s : List[Char]): Rexp = s match {
+ case Nil => ONE
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp =
+ charlist2rexp(s.toList)
+
+implicit def RexpOps(r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+ def $ (r: Rexp) = RECD(s, r)
+}
+
+def nullable(r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case RECD(_, r1) => nullable(r1)
+}
+
+def der(c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case RECD(_, r1) => der(c, r1)
+}
+
+
+// extracts a string from a value
+def flatten(v: Val) : String = v match {
+ case Empty => ""
+ case Chr(c) => c.toString
+ case Left(v) => flatten(v)
+ case Right(v) => flatten(v)
+ case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ case Stars(vs) => vs.map(flatten).mkString
+ case Rec(_, v) => flatten(v)
+}
+
+
+// extracts an environment from a value;
+// used for tokenising a string
+def env(v: Val) : List[(String, String)] = v match {
+ case Empty => Nil
+ case Chr(c) => Nil
+ case Left(v) => env(v)
+ case Right(v) => env(v)
+ case Sequ(v1, v2) => env(v1) ::: env(v2)
+ case Stars(vs) => vs.flatMap(env)
+ case Rec(x, v) => (x, flatten(v))::env(v)
+}
+
+
+// The injection and mkeps part of the lexer
+//===========================================
+
+def mkeps(r: Rexp) : Val = r match {
+ case ONE => Empty
+ case ALT(r1, r2) =>
+ if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ case STAR(r) => Stars(Nil)
+ case RECD(x, r) => Rec(x, mkeps(r))
+}
+
+def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ case (CHAR(d), Empty) => Chr(c)
+ case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+}
+
+// lexing functions including simplification
+def lex(r: Rexp, s: List[Char]) : Val = s match {
+ case Nil => if (nullable(r)) mkeps(r) else
+ { throw new Exception("lexing error") }
+ case c::cs => inj(r, c, lex(der(c, r), cs))
+}
+
+def lexing(r: Rexp, s: String) =
+ env(lex(r, s.toList))
+
+
+// The Lexing Rules for the WHILE Language
+
+def PLUS(r: Rexp) = r ~ r.%
+
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => ZERO
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SYM ~ (SYM | DIGIT).%
+val NUM = PLUS(DIGIT)
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+val SEMI: Rexp = ";"
+val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+val WHITESPACE = PLUS(" " | "\n" | "\t")
+val RPAREN: Rexp = "{"
+val LPAREN: Rexp = "}"
+val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+val WHILE_REGS = (("k" $ KEYWORD) |
+ ("i" $ ID) |
+ ("o" $ OP) |
+ ("n" $ NUM) |
+ ("s" $ SEMI) |
+ ("str" $ STRING) |
+ ("p" $ (LPAREN | RPAREN)) |
+ ("w" $ WHITESPACE)).%
+
+val KY : Rexp = "if" | "read" | "write"
+val WH : Rexp = " " | "\n"
+
+val TRIV_REGS = (("k" $ KY) |
+ ("w" $ WHITESPACE)).%
+
+// Two Simple While Tests
+//========================
+
+@doc("small tests")
+@main
+def small() = {
+
+ val prog0 = """if"""
+ println(s"test: $prog0")
+ println(lexing(WHILE_REGS, prog0))
+
+ val prog1 = """iffoo"""
+ println(s"test: $prog1")
+ println(lexing(WHILE_REGS, prog1))
+
+ val prog2 = """read n; write n"""
+ println(s"test: $prog2")
+ println(lexing(WHILE_REGS, prog2))
+}
+
Binary file slides/slides04.pdf has changed
--- a/slides/slides04.tex Wed Oct 21 09:24:32 2020 +0100
+++ b/slides/slides04.tex Wed Oct 21 14:55:20 2020 +0100
@@ -487,7 +487,7 @@
\bl{$|Left(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\
\bl{$|Right(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\
\bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\
- \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
+ \bl{$|Stars\,[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
\end{tabular}
\end{center}
@@ -604,7 +604,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
\frametitle{Inject}
@@ -629,7 +629,71 @@
& result $\mapsto$ a value
\end{tabular}
\end{frame}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}[t]
+
+ \begin{center}
+ \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+ \bl{$inj\,(c)\,c\,(Empty)$} & \bl{$\dn$} & \bl{$Char\,c$}\\
+ \end{tabular}
+ \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}[t]
+
+ \begin{center}
+ \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+ \bl{$inj\,(r_1 + r_2)\,c\,(Left(v))$} & \bl{$\dn$} & \bl{$Left(inj\,r_1\,c\,v)$}\\
+ \bl{$inj\,(r_1 + r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Right(inj\,r_2\,c\,v)$}\\
+
+ \end{tabular}
+ \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}[t]
+
+ \begin{center}
+ \begin{tabular}{@{\hspace{-8mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+ \\[-15mm]
+ \bl{$inj\,(r_1 \cdot r_2)\,c\,(Seq(v_1,v_2))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
+ \bl{$inj\,(r_1 \cdot r_2)\,c\,(Left(Seq(v_1,v_2)))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
+ \bl{$inj\,(r_1 \cdot r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\
+ \end{tabular}
+\end{center}
+
+
+\begin{textblock}{13}(1,13.4)
+\begin{bubble}[13cm]
+\small
+\bl{$\der\, c\, (r_1 \cdot r_2)$} \bl{$\dn$} \bl{\textbf{if} $nullable (r_1)$}
+ \bl{\textbf{then} $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}
+ \bl{\textbf{else} $(\der\, c\, r_1) \cdot r_2$}
+\end{bubble}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ \begin{frame}[t]
+
+ \begin{center}
+ \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+ \bl{$inj\,(r^*)\,c\,(Seq(v,Stars\,vs))$} & \bl{$\dn$} & \bl{$Stars\,(inj\,r\,c\,v\,::\,vs)$}
+ \end{tabular}
+ \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
@@ -638,7 +702,7 @@
\begin{center}
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
\bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
- \bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{$inj\,r\,c\,lex(der(c,r), s)$}\\
+ \bl{$lex\,r\,a::s$} & \bl{$\dn$} & \bl{$inj\,r\,a\,lex(der(a,r), s)$}\\
\end{tabular}
\end{center}
@@ -689,6 +753,17 @@
\small
for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}
+\begin{textblock}{6}(10,10)
+\begin{bubble}[2cm]
+\small
+\begin{tabular}{l}
+ \bl{$(id : r_{id})$}\\
+ \bl{$(key : r_{key})$}\\
+\end{tabular}
+\end{bubble}
+\end{textblock}
+
+
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%