diff -r 0c491eff5b01 -r 14e5ae1fb541 progs/lexer/lex.sc --- a/progs/lexer/lex.sc Mon Feb 03 13:25:59 2025 +0000 +++ b/progs/lexer/lex.sc Fri Sep 05 16:59:48 2025 +0100 @@ -7,28 +7,33 @@ // -// regular expressions including records -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - // records for extracting strings or tokens - +// regular expressions including recods +// for extracting strings or tokens +enum Rexp { + case ZERO + case ONE + case CHAR(c: Char) + case ALT(r1: Rexp, r2: Rexp) + case SEQ(r1: Rexp, r2: Rexp) + case STAR(r: Rexp) + case RECD[A](label: A, r: Rexp) +} +import Rexp._ + // values -abstract class Val -case object Empty extends Val -case class Chr(c: Char) extends Val -case class Sequ(v1: Val, v2: Val) extends Val -case class Left(v: Val) extends Val -case class Right(v: Val) extends Val -case class Stars(vs: List[Val]) extends Val -case class Rec(x: String, v: Val) extends Val - +enum Val { + case Empty + case Chr(c: Char) + case Sequ(v1: Val, v2: Val) + case Left(v: Val) + case Right(v: Val) + case Stars(vs: List[Val]) + case Rec[A](label: A, v: Val) +} +import Val._ + // some convenience for typing in regular expressions +import scala.language.implicitConversions def charlist2rexp(s : List[Char]): Rexp = s match { case Nil => ONE @@ -48,9 +53,6 @@ // to use & for records, instead of $ which had // its precedence be changed in Scala 3 -extension (s: String) { - def & (r: Rexp) = RECD(s, r) -} val TEST = ("ab" | "ba").% @@ -91,21 +93,27 @@ // extracts an environment from a value; // used for tokenising a string -def env(v: Val) : List[(String, String)] = v match { +//import scala.reflect.ClassTag + +def env[A](v: Val) : List[(A, String)] = v match { case Empty => Nil case Chr(c) => Nil case Left(v) => env(v) case Right(v) => env(v) case Sequ(v1, v2) => env(v1) ::: env(v2) case Stars(vs) => vs.flatMap(env) - case Rec(x, v) => (x, flatten(v))::env(v) + case Rec[A](x, v) => (x, flatten(v))::env(v) } // The injection and mkeps part of the lexer //=========================================== -def mkeps(r: Rexp) : Val = r match { +// the pattern-matches are defined to be @unchecked +// because they do not need to be defined for +// all cases + +def mkeps(r: Rexp) : Val = (r: @unchecked) match { case ONE => Empty case ALT(r1, r2) => if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) @@ -114,7 +122,7 @@ case RECD(x, r) => Rec(x, mkeps(r)) } -def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { +def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match { case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) @@ -132,13 +140,27 @@ case c::cs => inj(r, c, lex(der(c, r), cs)) } -def lexing(r: Rexp, s: String) = - env(lex(r, s.toList)) + println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList)) println(lex(STAR("aa" | "a"), "aaa".toList)) +println(lex(STAR(STAR("a")), "aaa".toList)) + +val re = ("a" | "ab") ~ ("c" | "bc") + +println(pders1("abc", re).toList.mkString("\n")) +pders('a', pder('a', re)))) +draw(simp(der('a', der('a', der('a', re))))) + +size(simp(ders(, re))) +size(simp(der('a', der('a', re)))) +size(simp(der('a', der('a', der('a', re))))) + + +lex(re, "aaaaa".toList) + // The Lexing Rules for the WHILE Language def PLUS(r: Rexp) = r ~ r.% @@ -163,25 +185,31 @@ val STRING: Rexp = "\"" ~ SYM.% ~ "\"" -val WHILE_REGS = (("k" & KEYWORD) | - ("i" & ID) | - ("o" & OP) | - ("n" & NUM) | - ("s" & SEMI) | - ("str" & STRING) | - ("p" & (LPAREN | RPAREN)) | - ("w" & WHITESPACE)).% +enum TAGS { + case Key, Id, Op, Num, Semi, Str, Paren, Wht +} +import TAGS._ + +extension (t: TAGS) { + def & (r: Rexp) = RECD[TAGS](t, r) +} -val KY : Rexp = "if" | "read" | "write" -val WH : Rexp = " " | "\n" +def lexing(r: Rexp, s: String) = + env[TAGS](lex(r, s.toList)) -val TRIV_REGS = (("k" & KY) | - ("w" & WHITESPACE)).% +val WHILE_REGS = ((Key & KEYWORD) | + (Id & ID) | + (Op & OP) | + (Num & NUM) | + (Semi & SEMI) | + (Str & STRING) | + (Paren & (LPAREN | RPAREN)) | + (Wht & WHITESPACE)).% + // Two Simple While Tests //======================== - -//@arg(doc = "small tests") + @main def small() = {