progs/lexer/lex.sc
changeset 981 14e5ae1fb541
parent 959 64ec1884d860
--- a/progs/lexer/lex.sc	Mon Feb 03 13:25:59 2025 +0000
+++ b/progs/lexer/lex.sc	Fri Sep 05 16:59:48 2025 +0100
@@ -7,28 +7,33 @@
 //
 
 
-// regular expressions including records
-abstract class Rexp 
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
-case class RECD(x: String, r: Rexp) extends Rexp  
-          // records for extracting strings or tokens
-  
+// regular expressions including recods 
+// for extracting strings or tokens
+enum Rexp { 
+  case ZERO 
+  case ONE 
+  case CHAR(c: Char)
+  case ALT(r1: Rexp, r2: Rexp)
+  case SEQ(r1: Rexp, r2: Rexp)
+  case STAR(r: Rexp) 
+  case RECD[A](label: A, r: Rexp)
+}
+import Rexp._  
+
 // values  
-abstract class Val
-case object Empty extends Val
-case class Chr(c: Char) extends Val
-case class Sequ(v1: Val, v2: Val) extends Val
-case class Left(v: Val) extends Val
-case class Right(v: Val) extends Val
-case class Stars(vs: List[Val]) extends Val
-case class Rec(x: String, v: Val) extends Val
-   
+enum Val {
+  case Empty 
+  case Chr(c: Char) 
+  case Sequ(v1: Val, v2: Val) 
+  case Left(v: Val) 
+  case Right(v: Val) 
+  case Stars(vs: List[Val]) 
+  case Rec[A](label: A, v: Val)
+}   
+import Val._
+
 // some convenience for typing in regular expressions
+import scala.language.implicitConversions
 
 def charlist2rexp(s : List[Char]): Rexp = s match {
   case Nil => ONE
@@ -48,9 +53,6 @@
 
 // to use & for records, instead of $ which had
 // its precedence be changed in Scala 3
-extension (s: String) {
-  def & (r: Rexp) = RECD(s, r)
-}
 
 val TEST = ("ab" | "ba").%
 
@@ -91,21 +93,27 @@
 
 // extracts an environment from a value;
 // used for tokenising a string
-def env(v: Val) : List[(String, String)] = v match {
+//import scala.reflect.ClassTag
+
+def env[A](v: Val) : List[(A, String)] = v match {
   case Empty => Nil
   case Chr(c) => Nil
   case Left(v) => env(v)
   case Right(v) => env(v)
   case Sequ(v1, v2) => env(v1) ::: env(v2)
   case Stars(vs) => vs.flatMap(env)
-  case Rec(x, v) => (x, flatten(v))::env(v)
+  case Rec[A](x, v) => (x, flatten(v))::env(v)  
 }
 
 
 // The injection and mkeps part of the lexer
 //===========================================
 
-def mkeps(r: Rexp) : Val = r match {
+// the pattern-matches are defined to be @unchecked
+// because they do not need to be defined for
+// all cases
+
+def mkeps(r: Rexp) : Val = (r: @unchecked) match {
   case ONE => Empty
   case ALT(r1, r2) => 
     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
@@ -114,7 +122,7 @@
   case RECD(x, r) => Rec(x, mkeps(r))
 }
 
-def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match {
   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
@@ -132,13 +140,27 @@
   case c::cs => inj(r, c, lex(der(c, r), cs))
 }
 
-def lexing(r: Rexp, s: String) = 
-  env(lex(r, s.toList))
+
 
 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList))
 
 println(lex(STAR("aa" | "a"), "aaa".toList))
 
+println(lex(STAR(STAR("a")), "aaa".toList))
+
+val re = ("a" | "ab") ~ ("c" | "bc")
+
+println(pders1("abc", re).toList.mkString("\n"))
+pders('a', pder('a', re))))
+draw(simp(der('a', der('a', der('a', re)))))
+
+size(simp(ders(, re)))
+size(simp(der('a', der('a', re))))
+size(simp(der('a', der('a', der('a', re)))))
+
+
+lex(re, "aaaaa".toList)
+
 // The Lexing Rules for the WHILE Language
 
 def PLUS(r: Rexp) = r ~ r.%
@@ -163,25 +185,31 @@
 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
 
 
-val WHILE_REGS = (("k" & KEYWORD) | 
-                  ("i" & ID) | 
-                  ("o" & OP) | 
-                  ("n" & NUM) | 
-                  ("s" & SEMI) | 
-                  ("str" & STRING) |
-                  ("p" & (LPAREN | RPAREN)) | 
-                  ("w" & WHITESPACE)).%
+enum TAGS {
+  case Key, Id, Op, Num, Semi, Str, Paren, Wht
+}
+import TAGS._
+
+extension (t: TAGS) {
+  def & (r: Rexp) = RECD[TAGS](t, r)
+}
 
-val KY : Rexp = "if" | "read" | "write"
-val WH : Rexp = " " | "\n"
+def lexing(r: Rexp, s: String) = 
+  env[TAGS](lex(r, s.toList))
 
-val TRIV_REGS = (("k" & KY) | 
-                  ("w" & WHITESPACE)).%
+val WHILE_REGS = ((Key & KEYWORD) | 
+                  (Id & ID) | 
+                  (Op & OP) | 
+                  (Num & NUM) | 
+                  (Semi & SEMI) | 
+                  (Str & STRING) |
+                  (Paren & (LPAREN | RPAREN)) | 
+                  (Wht & WHITESPACE)).%
+
 
 // Two Simple While Tests
 //========================
-
-//@arg(doc = "small tests")
+ 
 @main
 def small() = {