# HG changeset patch # User Christian Urban # Date 1603288520 -3600 # Node ID da2488db453edb394231c52610fd70c4adf26feb # Parent faa4489267d5dbb4764d23cf9db765e3520ffc86 updated diff -r faa4489267d5 -r da2488db453e progs/lexer/lex.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/lexer/lex.sc Wed Oct 21 14:55:20 2020 +0100 @@ -0,0 +1,191 @@ +// A simple lexer inspired by work of Sulzmann & Lu +//================================================== +// + +// regular expressions including records +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp + // records for extracting strings or tokens + +// values +abstract class Val +case object Empty extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(v: Val) extends Val +case class Right(v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case class Rec(x: String, v: Val) extends Val + +// some convenience for typing in regular expressions + +def charlist2rexp(s : List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = + charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) + def $ (r: Rexp) = RECD(s, r) +} + +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case RECD(_, r1) => nullable(r1) +} + +def der(c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case RECD(_, r1) => der(c, r1) +} + + +// extracts a string from a value +def flatten(v: Val) : String = v match { + case Empty => "" + case Chr(c) => c.toString + case Left(v) => flatten(v) + case Right(v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) + case Stars(vs) => vs.map(flatten).mkString + case Rec(_, v) => flatten(v) +} + + +// extracts an environment from a value; +// used for tokenising a string +def env(v: Val) : List[(String, String)] = v match { + case Empty => Nil + case Chr(c) => Nil + case Left(v) => env(v) + case Right(v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) + case Rec(x, v) => (x, flatten(v))::env(v) +} + + +// The injection and mkeps part of the lexer +//=========================================== + +def mkeps(r: Rexp) : Val = r match { + case ONE => Empty + case ALT(r1, r2) => + if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case STAR(r) => Stars(Nil) + case RECD(x, r) => Rec(x, mkeps(r)) +} + +def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { + case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) + case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) + case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) + case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) + case (CHAR(d), Empty) => Chr(c) + case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) +} + +// lexing functions including simplification +def lex(r: Rexp, s: List[Char]) : Val = s match { + case Nil => if (nullable(r)) mkeps(r) else + { throw new Exception("lexing error") } + case c::cs => inj(r, c, lex(der(c, r), cs)) +} + +def lexing(r: Rexp, s: String) = + env(lex(r, s.toList)) + + +// The Lexing Rules for the WHILE Language + +def PLUS(r: Rexp) = r ~ r.% + +def Range(s : List[Char]) : Rexp = s match { + case Nil => ZERO + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SYM ~ (SYM | DIGIT).% +val NUM = PLUS(DIGIT) +val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" +val SEMI: Rexp = ";" +val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" +val WHITESPACE = PLUS(" " | "\n" | "\t") +val RPAREN: Rexp = "{" +val LPAREN: Rexp = "}" +val STRING: Rexp = "\"" ~ SYM.% ~ "\"" + + +val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("str" $ STRING) | + ("p" $ (LPAREN | RPAREN)) | + ("w" $ WHITESPACE)).% + +val KY : Rexp = "if" | "read" | "write" +val WH : Rexp = " " | "\n" + +val TRIV_REGS = (("k" $ KY) | + ("w" $ WHITESPACE)).% + +// Two Simple While Tests +//======================== + +@doc("small tests") +@main +def small() = { + + val prog0 = """if""" + println(s"test: $prog0") + println(lexing(WHILE_REGS, prog0)) + + val prog1 = """iffoo""" + println(s"test: $prog1") + println(lexing(WHILE_REGS, prog1)) + + val prog2 = """read n; write n""" + println(s"test: $prog2") + println(lexing(WHILE_REGS, prog2)) +} + diff -r faa4489267d5 -r da2488db453e slides/slides04.pdf Binary file slides/slides04.pdf has changed diff -r faa4489267d5 -r da2488db453e slides/slides04.tex --- a/slides/slides04.tex Wed Oct 21 09:24:32 2020 +0100 +++ b/slides/slides04.tex Wed Oct 21 14:55:20 2020 +0100 @@ -487,7 +487,7 @@ \bl{$|Left(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ \bl{$|Right(v)|$} & \bl{$\dn$} & \bl{$|v|$}\\ \bl{$|Seq(v_1,v_2)|$}& \bl{$\dn$} & \bl{$|v_1| \,@\, |v_2|$}\\ - \bl{$|[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\ + \bl{$|Stars\,[v_1,\ldots ,v_n]|$} & \bl{$\dn$} & \bl{$|v_1| \,@\ldots @\, |v_n|$}\\ \end{tabular} \end{center} @@ -604,7 +604,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{Inject} @@ -629,7 +629,71 @@ & result $\mapsto$ a value \end{tabular} \end{frame} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame}[t] + + \begin{center} + \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} + \bl{$inj\,(c)\,c\,(Empty)$} & \bl{$\dn$} & \bl{$Char\,c$}\\ + \end{tabular} + \end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame}[t] + + \begin{center} + \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} + \bl{$inj\,(r_1 + r_2)\,c\,(Left(v))$} & \bl{$\dn$} & \bl{$Left(inj\,r_1\,c\,v)$}\\ + \bl{$inj\,(r_1 + r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Right(inj\,r_2\,c\,v)$}\\ + + \end{tabular} + \end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame}[t] + + \begin{center} + \begin{tabular}{@{\hspace{-8mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} + \\[-15mm] + \bl{$inj\,(r_1 \cdot r_2)\,c\,(Seq(v_1,v_2))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ + \bl{$inj\,(r_1 \cdot r_2)\,c\,(Left(Seq(v_1,v_2)))$} & \bl{$\dn$} & \bl{$Seq(inj\,r_1\,c\,v_1,v_2)$}\\ + \bl{$inj\,(r_1 \cdot r_2)\,c\,(Right(v))$} & \bl{$\dn$} & \bl{$Seq(mkeps(r_1),inj\,r_2\,c\,v)$}\\ + \end{tabular} +\end{center} + + +\begin{textblock}{13}(1,13.4) +\begin{bubble}[13cm] +\small +\bl{$\der\, c\, (r_1 \cdot r_2)$} \bl{$\dn$} \bl{\textbf{if} $nullable (r_1)$} + \bl{\textbf{then} $(\der\,c\,r_1) \cdot r_2 + \der\, c\, r_2$} + \bl{\textbf{else} $(\der\, c\, r_1) \cdot r_2$} +\end{bubble} +\end{textblock} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + \begin{frame}[t] + + \begin{center} + \begin{tabular}{@{\hspace{-3mm}}l@{\hspace{1mm}}c@{\hspace{1mm}}l@{}} + \bl{$inj\,(r^*)\,c\,(Seq(v,Stars\,vs))$} & \bl{$\dn$} & \bl{$Stars\,(inj\,r\,c\,v\,::\,vs)$} + \end{tabular} + \end{center} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] @@ -638,7 +702,7 @@ \begin{center} \begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l} \bl{$lex\,r\,[]$} & \bl{$\dn$} & \bl{if $nullable(r)$ then $mkeps(r)$ else $error$}\\ - \bl{$lex\,r\,c::s$} & \bl{$\dn$} & \bl{$inj\,r\,c\,lex(der(c,r), s)$}\\ + \bl{$lex\,r\,a::s$} & \bl{$\dn$} & \bl{$inj\,r\,a\,lex(der(a,r), s)$}\\ \end{tabular} \end{center} @@ -689,6 +753,17 @@ \small for extracting subpatterns \bl{$(z: ((x:ab) + (y:ba))$} +\begin{textblock}{6}(10,10) +\begin{bubble}[2cm] +\small +\begin{tabular}{l} + \bl{$(id : r_{id})$}\\ + \bl{$(key : r_{key})$}\\ +\end{tabular} +\end{bubble} +\end{textblock} + + \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%