progs/lexer/lex.sc
changeset 981 14e5ae1fb541
parent 959 64ec1884d860
equal deleted inserted replaced
980:0c491eff5b01 981:14e5ae1fb541
     5 //
     5 //
     6 //   amm lex.sc 
     6 //   amm lex.sc 
     7 //
     7 //
     8 
     8 
     9 
     9 
    10 // regular expressions including records
    10 // regular expressions including recods 
    11 abstract class Rexp 
    11 // for extracting strings or tokens
    12 case object ZERO extends Rexp
    12 enum Rexp { 
    13 case object ONE extends Rexp
    13   case ZERO 
    14 case class CHAR(c: Char) extends Rexp
    14   case ONE 
    15 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    15   case CHAR(c: Char)
    16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    16   case ALT(r1: Rexp, r2: Rexp)
    17 case class STAR(r: Rexp) extends Rexp 
    17   case SEQ(r1: Rexp, r2: Rexp)
    18 case class RECD(x: String, r: Rexp) extends Rexp  
    18   case STAR(r: Rexp) 
    19           // records for extracting strings or tokens
    19   case RECD[A](label: A, r: Rexp)
    20   
    20 }
       
    21 import Rexp._  
       
    22 
    21 // values  
    23 // values  
    22 abstract class Val
    24 enum Val {
    23 case object Empty extends Val
    25   case Empty 
    24 case class Chr(c: Char) extends Val
    26   case Chr(c: Char) 
    25 case class Sequ(v1: Val, v2: Val) extends Val
    27   case Sequ(v1: Val, v2: Val) 
    26 case class Left(v: Val) extends Val
    28   case Left(v: Val) 
    27 case class Right(v: Val) extends Val
    29   case Right(v: Val) 
    28 case class Stars(vs: List[Val]) extends Val
    30   case Stars(vs: List[Val]) 
    29 case class Rec(x: String, v: Val) extends Val
    31   case Rec[A](label: A, v: Val)
    30    
    32 }   
       
    33 import Val._
       
    34 
    31 // some convenience for typing in regular expressions
    35 // some convenience for typing in regular expressions
       
    36 import scala.language.implicitConversions
    32 
    37 
    33 def charlist2rexp(s : List[Char]): Rexp = s match {
    38 def charlist2rexp(s : List[Char]): Rexp = s match {
    34   case Nil => ONE
    39   case Nil => ONE
    35   case c::Nil => CHAR(c)
    40   case c::Nil => CHAR(c)
    36   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    41   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    46   def ~ (s: Rexp) = SEQ(r, s)
    51   def ~ (s: Rexp) = SEQ(r, s)
    47 }
    52 }
    48 
    53 
    49 // to use & for records, instead of $ which had
    54 // to use & for records, instead of $ which had
    50 // its precedence be changed in Scala 3
    55 // its precedence be changed in Scala 3
    51 extension (s: String) {
       
    52   def & (r: Rexp) = RECD(s, r)
       
    53 }
       
    54 
    56 
    55 val TEST = ("ab" | "ba").%
    57 val TEST = ("ab" | "ba").%
    56 
    58 
    57 def nullable(r: Rexp) : Boolean = r match {
    59 def nullable(r: Rexp) : Boolean = r match {
    58   case ZERO => false
    60   case ZERO => false
    89 }
    91 }
    90 
    92 
    91 
    93 
    92 // extracts an environment from a value;
    94 // extracts an environment from a value;
    93 // used for tokenising a string
    95 // used for tokenising a string
    94 def env(v: Val) : List[(String, String)] = v match {
    96 //import scala.reflect.ClassTag
       
    97 
       
    98 def env[A](v: Val) : List[(A, String)] = v match {
    95   case Empty => Nil
    99   case Empty => Nil
    96   case Chr(c) => Nil
   100   case Chr(c) => Nil
    97   case Left(v) => env(v)
   101   case Left(v) => env(v)
    98   case Right(v) => env(v)
   102   case Right(v) => env(v)
    99   case Sequ(v1, v2) => env(v1) ::: env(v2)
   103   case Sequ(v1, v2) => env(v1) ::: env(v2)
   100   case Stars(vs) => vs.flatMap(env)
   104   case Stars(vs) => vs.flatMap(env)
   101   case Rec(x, v) => (x, flatten(v))::env(v)
   105   case Rec[A](x, v) => (x, flatten(v))::env(v)  
   102 }
   106 }
   103 
   107 
   104 
   108 
   105 // The injection and mkeps part of the lexer
   109 // The injection and mkeps part of the lexer
   106 //===========================================
   110 //===========================================
   107 
   111 
   108 def mkeps(r: Rexp) : Val = r match {
   112 // the pattern-matches are defined to be @unchecked
       
   113 // because they do not need to be defined for
       
   114 // all cases
       
   115 
       
   116 def mkeps(r: Rexp) : Val = (r: @unchecked) match {
   109   case ONE => Empty
   117   case ONE => Empty
   110   case ALT(r1, r2) => 
   118   case ALT(r1, r2) => 
   111     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   119     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   112   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   120   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   113   case STAR(r) => Stars(Nil)
   121   case STAR(r) => Stars(Nil)
   114   case RECD(x, r) => Rec(x, mkeps(r))
   122   case RECD(x, r) => Rec(x, mkeps(r))
   115 }
   123 }
   116 
   124 
   117 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   125 def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match {
   118   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   126   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   119   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   127   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   120   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   128   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   121   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   129   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   122   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   130   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   130   case Nil => if (nullable(r)) mkeps(r) else 
   138   case Nil => if (nullable(r)) mkeps(r) else 
   131     { throw new Exception("lexing error") } 
   139     { throw new Exception("lexing error") } 
   132   case c::cs => inj(r, c, lex(der(c, r), cs))
   140   case c::cs => inj(r, c, lex(der(c, r), cs))
   133 }
   141 }
   134 
   142 
   135 def lexing(r: Rexp, s: String) = 
   143 
   136   env(lex(r, s.toList))
       
   137 
   144 
   138 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList))
   145 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList))
   139 
   146 
   140 println(lex(STAR("aa" | "a"), "aaa".toList))
   147 println(lex(STAR("aa" | "a"), "aaa".toList))
       
   148 
       
   149 println(lex(STAR(STAR("a")), "aaa".toList))
       
   150 
       
   151 val re = ("a" | "ab") ~ ("c" | "bc")
       
   152 
       
   153 println(pders1("abc", re).toList.mkString("\n"))
       
   154 pders('a', pder('a', re))))
       
   155 draw(simp(der('a', der('a', der('a', re)))))
       
   156 
       
   157 size(simp(ders(, re)))
       
   158 size(simp(der('a', der('a', re))))
       
   159 size(simp(der('a', der('a', der('a', re)))))
       
   160 
       
   161 
       
   162 lex(re, "aaaaa".toList)
   141 
   163 
   142 // The Lexing Rules for the WHILE Language
   164 // The Lexing Rules for the WHILE Language
   143 
   165 
   144 def PLUS(r: Rexp) = r ~ r.%
   166 def PLUS(r: Rexp) = r ~ r.%
   145 
   167 
   161 val RPAREN: Rexp = "}"
   183 val RPAREN: Rexp = "}"
   162 val LPAREN: Rexp = "{"
   184 val LPAREN: Rexp = "{"
   163 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
   185 val STRING: Rexp = "\"" ~ SYM.% ~ "\""
   164 
   186 
   165 
   187 
   166 val WHILE_REGS = (("k" & KEYWORD) | 
   188 enum TAGS {
   167                   ("i" & ID) | 
   189   case Key, Id, Op, Num, Semi, Str, Paren, Wht
   168                   ("o" & OP) | 
   190 }
   169                   ("n" & NUM) | 
   191 import TAGS._
   170                   ("s" & SEMI) | 
   192 
   171                   ("str" & STRING) |
   193 extension (t: TAGS) {
   172                   ("p" & (LPAREN | RPAREN)) | 
   194   def & (r: Rexp) = RECD[TAGS](t, r)
   173                   ("w" & WHITESPACE)).%
   195 }
   174 
   196 
   175 val KY : Rexp = "if" | "read" | "write"
   197 def lexing(r: Rexp, s: String) = 
   176 val WH : Rexp = " " | "\n"
   198   env[TAGS](lex(r, s.toList))
   177 
   199 
   178 val TRIV_REGS = (("k" & KY) | 
   200 val WHILE_REGS = ((Key & KEYWORD) | 
   179                   ("w" & WHITESPACE)).%
   201                   (Id & ID) | 
       
   202                   (Op & OP) | 
       
   203                   (Num & NUM) | 
       
   204                   (Semi & SEMI) | 
       
   205                   (Str & STRING) |
       
   206                   (Paren & (LPAREN | RPAREN)) | 
       
   207                   (Wht & WHITESPACE)).%
       
   208 
   180 
   209 
   181 // Two Simple While Tests
   210 // Two Simple While Tests
   182 //========================
   211 //========================
   183 
   212  
   184 //@arg(doc = "small tests")
       
   185 @main
   213 @main
   186 def small() = {
   214 def small() = {
   187 
   215 
   188   val prog0 = """if"""
   216   val prog0 = """if"""
   189   println(s"test: $prog0")
   217   println(s"test: $prog0")