updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Wed, 21 Oct 2020 14:55:20 +0100
changeset 786 da2488db453e
parent 785 faa4489267d5
child 787 4b2496bc79b2
updated
progs/lexer/lex.sc
slides/slides04.pdf
slides/slides04.tex
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/lexer/lex.sc	Wed Oct 21 14:55:20 2020 +0100
@@ -0,0 +1,191 @@
+// A simple lexer inspired by work of Sulzmann & Lu
+//==================================================
+//
+
+// regular expressions including records
+abstract class Rexp 
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
+case class STAR(r: Rexp) extends Rexp 
+case class RECD(x: String, r: Rexp) extends Rexp  
+                // records for extracting strings or tokens
+  
+// values  
+abstract class Val
+case object Empty extends Val
+case class Chr(c: Char) extends Val
+case class Sequ(v1: Val, v2: Val) extends Val
+case class Left(v: Val) extends Val
+case class Right(v: Val) extends Val
+case class Stars(vs: List[Val]) extends Val
+case class Rec(x: String, v: Val) extends Val
+   
+// some convenience for typing in regular expressions
+
+def charlist2rexp(s : List[Char]): Rexp = s match {
+  case Nil => ONE
+  case c::Nil => CHAR(c)
+  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = 
+  charlist2rexp(s.toList)
+
+implicit def RexpOps(r: Rexp) = new {
+  def | (s: Rexp) = ALT(r, s)
+  def % = STAR(r)
+  def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+  def | (r: Rexp) = ALT(s, r)
+  def | (r: String) = ALT(s, r)
+  def % = STAR(s)
+  def ~ (r: Rexp) = SEQ(s, r)
+  def ~ (r: String) = SEQ(s, r)
+  def $ (r: Rexp) = RECD(s, r)
+}
+
+def nullable(r: Rexp) : Boolean = r match {
+  case ZERO => false
+  case ONE => true
+  case CHAR(_) => false
+  case ALT(r1, r2) => nullable(r1) || nullable(r2)
+  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case STAR(_) => true
+  case RECD(_, r1) => nullable(r1)
+}
+
+def der(c: Char, r: Rexp) : Rexp = r match {
+  case ZERO => ZERO
+  case ONE => ZERO
+  case CHAR(d) => if (c == d) ONE else ZERO
+  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+  case SEQ(r1, r2) => 
+    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+    else SEQ(der(c, r1), r2)
+  case STAR(r) => SEQ(der(c, r), STAR(r))
+  case RECD(_, r1) => der(c, r1)
+}
+
+
+// extracts a string from a value
+def flatten(v: Val) : String = v match {
+  case Empty => ""
+  case Chr(c) => c.toString
+  case Left(v) => flatten(v)
+  case Right(v) => flatten(v)
+  case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+  case Stars(vs) => vs.map(flatten).mkString
+  case Rec(_, v) => flatten(v)
+}
+
+
+// extracts an environment from a value;
+// used for tokenising a string
+def env(v: Val) : List[(String, String)] = v match {
+  case Empty => Nil
+  case Chr(c) => Nil
+  case Left(v) => env(v)
+  case Right(v) => env(v)
+  case Sequ(v1, v2) => env(v1) ::: env(v2)
+  case Stars(vs) => vs.flatMap(env)
+  case Rec(x, v) => (x, flatten(v))::env(v)
+}
+
+
+// The injection and mkeps part of the lexer
+//===========================================
+
+def mkeps(r: Rexp) : Val = r match {
+  case ONE => Empty
+  case ALT(r1, r2) => 
+    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+  case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+  case STAR(r) => Stars(Nil)
+  case RECD(x, r) => Rec(x, mkeps(r))
+}
+
+def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+  case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+  case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+  case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+  case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+  case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+  case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+  case (CHAR(d), Empty) => Chr(c) 
+  case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+}
+
+// lexing functions including simplification
+def lex(r: Rexp, s: List[Char]) : Val = s match {
+  case Nil => if (nullable(r)) mkeps(r) else 
+    { throw new Exception("lexing error") } 
+  case c::cs => inj(r, c, lex(der(c, r), cs))
+}
+
+def lexing(r: Rexp, s: String) = 
+  env(lex(r, s.toList))
+
+
+// The Lexing Rules for the WHILE Language
+
+def PLUS(r: Rexp) = r ~ r.%
+
+def Range(s : List[Char]) : Rexp = s match {
+  case Nil => ZERO
+  case c::Nil => CHAR(c)
+  case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SYM ~ (SYM | DIGIT).% 
+val NUM = PLUS(DIGIT)
+val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
+val SEMI: Rexp = ";"
+val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+val WHITESPACE = PLUS(" " | "\n" | "\t")
+val RPAREN: Rexp = "{"
+val LPAREN: Rexp = "}"
+val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+
+
+val WHILE_REGS = (("k" $ KEYWORD) | 
+                  ("i" $ ID) | 
+                  ("o" $ OP) | 
+                  ("n" $ NUM) | 
+                  ("s" $ SEMI) | 
+                  ("str" $ STRING) |
+                  ("p" $ (LPAREN | RPAREN)) | 
+                  ("w" $ WHITESPACE)).%
+
+val KY : Rexp = "if" | "read" | "write"
+val WH : Rexp = " " | "\n"
+
+val TRIV_REGS = (("k" $ KY) | 
+                  ("w" $ WHITESPACE)).%
+
+// Two Simple While Tests
+//========================
+
+@doc("small tests")
+@main
+def small() = {
+
+  val prog0 = """if"""
+  println(s"test: $prog0")
+  println(lexing(WHILE_REGS, prog0))
+
+  val prog1 = """iffoo"""
+  println(s"test: $prog1")
+  println(lexing(WHILE_REGS, prog1))
+
+  val prog2 = """read  n; write n"""  
+  println(s"test: $prog2")
+  println(lexing(WHILE_REGS, prog2))
+}
+
Binary file slides/slides04.pdf has changed
--- a/slides/slides04.tex	Wed Oct 21 09:24:32 2020 +0100
+++ b/slides/slides04.tex	Wed Oct 21 14:55:20 2020 +0100
@@ -487,7 +487,7 @@
   \bl{$|Left(v)|$}   & \bl{$\dn$} & \bl{$|v|$}\\
   \bl{$|Right(v)|$}  & \bl{$\dn$} & \bl{$|v|$}\\
   \bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\
-  \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
+  \bl{$|Stars\,[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\
 \end{tabular}
 \end{center}
 
@@ -604,7 +604,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
   
-  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   \begin{frame}[c]
   \frametitle{Inject}
   
@@ -629,7 +629,71 @@
               & result $\mapsto$ a value 
   \end{tabular}
   \end{frame}
-  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  \begin{frame}[t]
+  
+  \begin{center}
+  \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+    \bl{$inj\,(c)\,c\,(Empty)$} & \bl{$\dn$}  & \bl{$Char\,c$}\\
+  \end{tabular}
+  \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  \begin{frame}[t]
+  
+  \begin{center}
+    \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+    \bl{$inj\,(r_1 + r_2)\,c\,(Left(v))$} & \bl{$\dn$}  & \bl{$Left(inj\,r_1\,c\,v)$}\\
+    \bl{$inj\,(r_1 + r_2)\,c\,(Right(v))$} & \bl{$\dn$}  & \bl{$Right(inj\,r_2\,c\,v)$}\\
+
+  \end{tabular}
+  \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  \begin{frame}[t]
+  
+  \begin{center}
+    \begin{tabular}{@{\hspace{-8mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+    \\[-15mm]  
+    \bl{$inj\,(r_1 \cdot r_2)\,c\,(Seq(v_1,v_2))$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
+    \bl{$inj\,(r_1 \cdot r_2)\,c\,(Left(Seq(v_1,v_2)))$} & \bl{$\dn$}  & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\
+    \bl{$inj\,(r_1 \cdot r_2)\,c\,(Right(v))$} & \bl{$\dn$}  & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\  
+  \end{tabular}
+\end{center}
+
+
+\begin{textblock}{13}(1,13.4)
+\begin{bubble}[13cm]
+\small
+\bl{$\der\, c\, (r_1 \cdot r_2)$} \bl{$\dn$} \bl{\textbf{if} $nullable (r_1)$}
+ \bl{\textbf{then} $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$}
+ \bl{\textbf{else} $(\der\, c\, r_1) \cdot r_2$}
+\end{bubble}
+\end{textblock}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  \begin{frame}[t]
+  
+  \begin{center}
+  \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}}
+    \bl{$inj\,(r^*)\,c\,(Seq(v,Stars\,vs))$} & \bl{$\dn$}  & \bl{$Stars\,(inj\,r\,c\,v\,::\,vs)$}
+  \end{tabular}
+  \end{center}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
   
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
@@ -638,7 +702,7 @@
 \begin{center}
 \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
   \bl{$lex\,r\,[]$} & \bl{$\dn$}  & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\
-  \bl{$lex\,r\,c::s$} & \bl{$\dn$}  & \bl{$inj\,r\,c\,lex(der(c,r), s)$}\\
+  \bl{$lex\,r\,a::s$} & \bl{$\dn$}  & \bl{$inj\,r\,a\,lex(der(a,r), s)$}\\
 \end{tabular}
 \end{center}
 
@@ -689,6 +753,17 @@
 \small
 for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$}
 
+\begin{textblock}{6}(10,10)
+\begin{bubble}[2cm]
+\small
+\begin{tabular}{l}
+  \bl{$(id : r_{id})$}\\
+  \bl{$(key : r_{key})$}\\
+\end{tabular}
+\end{bubble}
+\end{textblock}
+
+
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%