solutions/cw5/fun_tokens.sc
changeset 903 2f86ebda3629
parent 894 02ef5c3abc51
child 920 7af2eea19646
equal deleted inserted replaced
902:b40aaffe0793 903:2f86ebda3629
     1 // A tokeniser for the Fun language
     1 // Author: Zhuo Ying Jiang Li
     2 //==================================
     2 // Starting code by Dr Christian Urban
       
     3 
       
     4 // lexer
       
     5 
     3 //
     6 //
     4 // call with 
     7 // Use this command to print the list of tokens:
       
     8 // amm fun_token.sc <name>.fun
     5 //
     9 //
     6 //     amm fun_tokens.sc fact.fun
    10 
     7 //
    11 type Token = (String, String)
     8 //     amm fun_tokens.sc defs.fun
    12 type Tokens = List[Token]
     9 //
    13 
    10 
    14 // regular expressions including records
    11 
    15 abstract class Rexp
    12 
       
    13 import scala.language.implicitConversions    
       
    14 import scala.language.reflectiveCalls 
       
    15 
       
    16 abstract class Rexp 
       
    17 case object ZERO extends Rexp
    16 case object ZERO extends Rexp
    18 case object ONE extends Rexp
    17 case object ONE extends Rexp
    19 case class CHAR(c: Char) extends Rexp
    18 case class CHAR(c: Char) extends Rexp
    20 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    19 case class RANGE(chars: List[Char]) extends Rexp
    21 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    20 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
    22 case class STAR(r: Rexp) extends Rexp 
    21 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
    23 case class RECD(x: String, r: Rexp) extends Rexp
    22 case class STAR(r: Rexp) extends Rexp
    24   
    23 case class OPTIONAL(r: Rexp) extends Rexp
       
    24 case class PLUS(r: Rexp) extends Rexp
       
    25 case class NTIMES(r: Rexp, n: Int) extends Rexp
       
    26 case class RECD(x: String, r: Rexp) extends Rexp  // records for extracting strings or tokens
       
    27 
       
    28 // values
    25 abstract class Val
    29 abstract class Val
    26 case object Empty extends Val
    30 case object Empty extends Val
    27 case class Chr(c: Char) extends Val
    31 case class Chr(c: Char) extends Val
    28 case class Sequ(v1: Val, v2: Val) extends Val
    32 case class Sequ(v1: Val, v2: Val) extends Val
    29 case class Left(v: Val) extends Val
    33 case class Left(v: Val) extends Val
    30 case class Right(v: Val) extends Val
    34 case class Right(v: Val) extends Val
    31 case class Stars(vs: List[Val]) extends Val
    35 case class Stars(vs: List[Val]) extends Val
       
    36 case class Opt(v: Val) extends Val
       
    37 case class Pls(vs: List[Val]) extends Val
       
    38 case class Nt(vs: List[Val]) extends Val
    32 case class Rec(x: String, v: Val) extends Val
    39 case class Rec(x: String, v: Val) extends Val
    33    
    40 
    34 // some convenience for typing in regular expressions
    41 // some convenience for typing in regular expressions
    35 def charlist2rexp(s : List[Char]): Rexp = s match {
    42 def charlist2rexp(s : List[Char]): Rexp = s match {
    36   case Nil => ONE
    43   case Nil => ONE
    37   case c::Nil => CHAR(c)
    44   case c::Nil => CHAR(c)
    38   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    45   case c::vs => SEQ(CHAR(c), charlist2rexp(vs))
    39 }
    46 }
    40 implicit def string2rexp(s : String) : Rexp = 
    47 
       
    48 implicit def string2rexp(s : String) : Rexp =
    41   charlist2rexp(s.toList)
    49   charlist2rexp(s.toList)
    42 
    50 
    43 implicit def RexpOps(r: Rexp) = new {
    51 implicit def RexpOps(r: Rexp) = new {
    44   def | (s: Rexp) = ALT(r, s)
    52   def | (s: Rexp) = ALT(r, s)
    45   def % = STAR(r)
    53   def % = STAR(r)
       
    54   def ? = OPTIONAL(r)
       
    55   def + = PLUS(r)
       
    56   def ^ (n: Int) = NTIMES(r, n)
    46   def ~ (s: Rexp) = SEQ(r, s)
    57   def ~ (s: Rexp) = SEQ(r, s)
    47 }
    58 }
    48 
    59 
    49 implicit def stringOps(s: String) = new {
    60 implicit def stringOps(s: String) = new {
    50   def | (r: Rexp) = ALT(s, r)
    61   def | (r: Rexp) = ALT(s, r)
    51   def | (r: String) = ALT(s, r)
    62   def | (r: String) = ALT(s, r)
    52   def % = STAR(s)
    63   def % = STAR(s)
       
    64   def ? = OPTIONAL(s)
       
    65   def + = PLUS(s)
       
    66   def ^ (n: Int) = NTIMES(s, n)
    53   def ~ (r: Rexp) = SEQ(s, r)
    67   def ~ (r: Rexp) = SEQ(s, r)
    54   def ~ (r: String) = SEQ(s, r)
    68   def ~ (r: String) = SEQ(s, r)
    55   def $ (r: Rexp) = RECD(s, r)
    69   def $ (r: Rexp) = RECD(s, r)
    56 }
    70 }
    57 
    71 
    58 def nullable (r: Rexp) : Boolean = r match {
    72 def nullable(r: Rexp) : Boolean = r match {
    59   case ZERO => false
    73   case ZERO => false
    60   case ONE => true
    74   case ONE => true
    61   case CHAR(_) => false
    75   case CHAR(_) => false
       
    76   case RANGE(_) => false
    62   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    77   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    63   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    78   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    64   case STAR(_) => true
    79   case STAR(_) => true
       
    80   case OPTIONAL(r1) => true
       
    81   case PLUS(r1) => nullable(r1)
       
    82   case NTIMES(r1, n) => if (n == 0) true else nullable(r1)
    65   case RECD(_, r1) => nullable(r1)
    83   case RECD(_, r1) => nullable(r1)
    66 }
    84 }
    67 
    85 
    68 def der (c: Char, r: Rexp) : Rexp = r match {
    86 def der(c: Char, r: Rexp) : Rexp = r match {
    69   case ZERO => ZERO
    87   case ZERO => ZERO
    70   case ONE => ZERO
    88   case ONE => ZERO
    71   case CHAR(d) => if (c == d) ONE else ZERO
    89   case CHAR(d) => if (c == d) ONE else ZERO
       
    90   case RANGE(chars) => if (chars.contains(c)) ONE else ZERO
    72   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    91   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    73   case SEQ(r1, r2) => 
    92   case SEQ(r1, r2) =>
    74     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    93     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    75     else SEQ(der(c, r1), r2)
    94     else SEQ(der(c, r1), r2)
    76   case STAR(r) => SEQ(der(c, r), STAR(r))
    95   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    96   case OPTIONAL(r) => der(c, r)
       
    97   case PLUS(r) => SEQ(der(c, r), STAR(r))
       
    98   case NTIMES(r1, n) => if (n == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, n - 1))
    77   case RECD(_, r1) => der(c, r1)
    99   case RECD(_, r1) => der(c, r1)
    78 }
   100 }
    79 
   101 
    80 
   102 // extracts a string from a value
    81 // extracts a string from value
       
    82 def flatten(v: Val) : String = v match {
   103 def flatten(v: Val) : String = v match {
    83   case Empty => ""
   104   case Empty => ""
    84   case Chr(c) => c.toString
   105   case Chr(c) => c.toString
    85   case Left(v) => flatten(v)
   106   case Left(v) => flatten(v)
    86   case Right(v) => flatten(v)
   107   case Right(v) => flatten(v)
    87   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
   108   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
    88   case Stars(vs) => vs.map(flatten).mkString
   109   case Stars(vs) => vs.map(flatten).mkString
       
   110   case Opt(v) => flatten(v)
       
   111   case Pls(vs) => vs.map(flatten).mkString
       
   112   case Nt(vs) => vs.map(flatten).mkString
    89   case Rec(_, v) => flatten(v)
   113   case Rec(_, v) => flatten(v)
    90 }
   114 }
    91 
   115 
    92 // extracts an environment from a value;
   116 // extracts an environment from a value;
    93 // used for tokenise a string
   117 // used for tokenising a string
    94 def env(v: Val) : List[(String, String)] = v match {
   118 def env(v: Val) : Tokens = v match {
    95   case Empty => Nil
   119   case Empty => Nil
    96   case Chr(c) => Nil
   120   case Chr(c) => Nil
    97   case Left(v) => env(v)
   121   case Left(v) => env(v)
    98   case Right(v) => env(v)
   122   case Right(v) => env(v)
    99   case Sequ(v1, v2) => env(v1) ::: env(v2)
   123   case Sequ(v1, v2) => env(v1) ::: env(v2)
   100   case Stars(vs) => vs.flatMap(env)
   124   case Stars(vs) => vs.flatMap(env)
       
   125   case Opt(v) => env(v)
       
   126   case Pls(vs) => vs.flatMap(env)
       
   127   case Nt(vs) => vs.flatMap(env)
   101   case Rec(x, v) => (x, flatten(v))::env(v)
   128   case Rec(x, v) => (x, flatten(v))::env(v)
   102 }
   129 }
   103 
   130 
   104 // The Injection Part of the lexer
   131 
       
   132 // The injection and mkeps part of the lexer
       
   133 //===========================================
   105 
   134 
   106 def mkeps(r: Rexp) : Val = r match {
   135 def mkeps(r: Rexp) : Val = r match {
   107   case ONE => Empty
   136   case ONE => Empty
   108   case ALT(r1, r2) => 
   137   case RANGE(chars) => throw new Exception("lexing error")  // this will never be called but the coursework asks for it so...
       
   138   case ALT(r1, r2) =>
   109     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   139     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   110   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   140   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   111   case STAR(r) => Stars(Nil)
   141   case STAR(r) => Stars(Nil)
       
   142   case OPTIONAL(r) => Opt(Empty)
       
   143   case PLUS(r) => Pls(List(mkeps(r))) // scala define a list with one element
       
   144   case NTIMES(r, n) => if (n == 0) Nt(Nil) else Nt(List.fill(n)(mkeps(r))) // wrong
   112   case RECD(x, r) => Rec(x, mkeps(r))
   145   case RECD(x, r) => Rec(x, mkeps(r))
       
   146   case _ => throw new Exception("lexing error")
   113 }
   147 }
   114 
   148 
   115 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   149 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   116   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   150   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   117   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   151   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   118   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   152   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   119   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   153   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   120   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   154   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   121   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   155   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
   122   case (CHAR(d), Empty) => Chr(c) 
   156   case (CHAR(d), Empty) => Chr(c)
       
   157   case (RANGE(chars), Empty) => Chr(c)
       
   158   case (OPTIONAL(r1), v) => Opt(inj(r1, c, v))
       
   159   case (PLUS(r1), Sequ(v1, Stars(vs))) => Pls(inj(r1, c, v1)::vs)
       
   160   case (NTIMES(r1, n), Sequ(v1, Nt(vs))) => Nt(inj(r1, c, v1)::vs)
   123   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   161   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   124   case _ => { println ("Injection error") ; sys.exit(-1) } 
       
   125 }
   162 }
   126 
   163 
   127 // some "rectification" functions for simplification
   164 // some "rectification" functions for simplification
   128 def F_ID(v: Val): Val = v
   165 def F_ID(v: Val): Val = v
   129 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   166 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
   133   case Left(v) => Left(f1(v))
   170   case Left(v) => Left(f1(v))
   134 }
   171 }
   135 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   172 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   136   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   173   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
   137 }
   174 }
   138 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
   175 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
   139   (v:Val) => Sequ(f1(Empty), f2(v))
   176   (v:Val) => Sequ(f1(Empty), f2(v))
   140 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
   177 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
   141   (v:Val) => Sequ(f1(v), f2(Empty))
   178   (v:Val) => Sequ(f1(v), f2(Empty))
   142 def F_RECD(f: Val => Val) = (v:Val) => v match {
   179 
   143   case Rec(x, v) => Rec(x, f(v))
       
   144 }
       
   145 def F_ERROR(v: Val): Val = throw new Exception("error")
   180 def F_ERROR(v: Val): Val = throw new Exception("error")
   146 
   181 
       
   182 // simplification
   147 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   183 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   148   case ALT(r1, r2) => {
   184   case ALT(r1, r2) => {
   149     val (r1s, f1s) = simp(r1)
   185     val (r1s, f1s) = simp(r1)
   150     val (r2s, f2s) = simp(r2)
   186     val (r2s, f2s) = simp(r2)
   151     (r1s, r2s) match {
   187     (r1s, r2s) match {
   152       case (ZERO, _) => (r2s, F_RIGHT(f2s))
   188       case (ZERO, _) => (r2s, F_RIGHT(f2s))
   153       case (_, ZERO) => (r1s, F_LEFT(f1s))
   189       case (_, ZERO) => (r1s, F_LEFT(f1s))
   154       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
   190       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
   155                 else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
   191                 else (ALT (r1s, r2s), F_ALT(f1s, f2s))
   156     }
   192     }
   157   }
   193   }
   158   case SEQ(r1, r2) => {
   194   case SEQ(r1, r2) => {
   159     val (r1s, f1s) = simp(r1)
   195     val (r1s, f1s) = simp(r1)
   160     val (r2s, f2s) = simp(r2)
   196     val (r2s, f2s) = simp(r2)
   164       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   200       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
   165       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
   201       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
   166       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   202       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
   167     }
   203     }
   168   }
   204   }
   169   case RECD(x, r1) => {
       
   170     val (r1s, f1s) = simp(r1)
       
   171     (RECD(x, r1s), F_RECD(f1s))
       
   172   }
       
   173   case r => (r, F_ID)
   205   case r => (r, F_ID)
   174 }
   206 }
   175 
   207 
   176 // lexing functions including simplification
   208 // lexing functions including simplification
   177 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   209 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
   178   case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) } 
   210   case Nil => if (nullable(r)) mkeps(r) else
       
   211     { throw new Exception("lexing error") }
   179   case c::cs => {
   212   case c::cs => {
   180     val (r_simp, f_simp) = simp(der(c, r))
   213     val (r_simp, f_simp) = simp(der(c, r))
   181     inj(r, c, f_simp(lex_simp(r_simp, cs)))
   214     inj(r, c, f_simp(lex_simp(r_simp, cs)))
   182   }
   215   }
   183 }
   216 }
   184 
   217 
   185 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
   218 def lexing_simp(r: Rexp, s: String) =
   186 
   219   env(lex_simp(r, s.toList))
   187 
   220 
   188 // The Lexing Rules for the Fun Language
   221 
   189 
   222 // FUN language lexer
   190 def PLUS(r: Rexp) = r ~ r.%
   223 
   191 def OPT(r: Rexp) = r | ONE
   224 val DIGIT = RANGE("0123456789".toList)
   192 
   225 val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
   193 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | 
   226 val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
   194           "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | 
   227 val SYM = RANGE("!\"#$%&'()*+,-./:;<>=?`@[]\\^_{}|~".toList)  // I referenced the CPP ASCII table https://en.cppreference.com/w/cpp/language/ascii
   195           "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" |"E" | "F" | "G" |
   228 
   196           "H" | "I" | "J" | "K" |"L" | "M" | "N" |
   229 
   197           "O" | "P" | "Q" | "R" |"S" | "T" | "U" |
   230 val KEYWORD : Rexp = "val" | "if" | "then" | "else" | "def" | "skip" // "skip" is hardcoded because hanoi.fun calls skip() without parentheses
   198           "V" | "W" | "X" | "Y" | "Z" | "_" | ":"
   231 val TYPE : Rexp = "Int" | "Double" | "Void"
   199 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   232 val GLOBAL_ID : Rexp = UPPERCASE ~ ("_" | LOWERCASE | DIGIT | UPPERCASE).% // start with capital letter and followed by any case
   200 val ID = SYM ~ (SYM | DIGIT).% 
   233 val ID : Rexp = LOWERCASE ~ ("_" | UPPERCASE | LOWERCASE | DIGIT).% // start with lowercase 
   201 val NUM = PLUS(DIGIT)
   234 val SEMI : Rexp = ";"
   202 val FNUM = OPT("-") ~ NUM ~ "." ~ NUM 
   235 val COLON : Rexp = ":"
   203 val KEYWORD : Rexp = "if" | "then" | "else" | "def" | "val"
   236 val OP : Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" // no && and || operators
   204 val TYPE : Rexp = "Void" | "Int" | "Double" 
   237 val INT : Rexp = DIGIT.+
   205 val SEMI: Rexp = ";"
   238 val DOUBLE : Rexp = DIGIT.+ ~ "." ~ DIGIT.+  // negative numbers sign is lexed as operator, but the parser will identify negative numbers
   206 val COLON: Rexp = ":"
   239 val COMMA : Rexp = "," 
   207 val COMMA: Rexp = ","
   240 val WHITESPACES: Rexp = (" " | "\n" | "\t" | "\r").+ // whitespaces are either " " or \n or \t or \r
   208 val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
   241 val LPAREN : Rexp = RANGE("({".toList)
   209 val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
   242 val RPAREN : Rexp = RANGE(")}".toList)
   210 val RPAREN: Rexp = ")" | "}"
   243 val CH : Rexp = "'" ~ (LOWERCASE | UPPERCASE | DIGIT | SYM | " " | "\\n" | "\\t" | "\\r") ~ "'"  // \n, \t and \r should also be tokenized, any character should be, whitespaces too
   211 val LPAREN: Rexp = "(" | "{"
   244 val COMMENT : Rexp = ("//" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \t\r".toList)).% ~ "\n") | ("/*" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \n\t\r".toList)).% ~ "*/")
   212 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "-" | "." | "\"" | "=" | "," | "(" | ")" | "{" | "}"
   245 
   213 val ALL2 = ALL | "\n"
   246 val FUN_REGS = (("keyword" $ KEYWORD) |
   214 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   247                 ("type" $ TYPE) |
   215 
   248                 ("global" $ GLOBAL_ID) |
   216 val CHR :Rexp = "'" ~ (ALL | "\\n") ~ "'" 
   249                 ("id" $ ID) |
   217 
   250                 ("op" $ OP) |
   218 
   251                 ("double" $ DOUBLE) |
   219 val FUN_REGS = (("k" $ KEYWORD) | 
   252                 ("int" $ INT) |
   220                 ("t" $ TYPE) |
   253                 ("semi" $ SEMI) |
   221                 ("i" $ ID) | 
   254                 ("colon" $ COLON) |
   222                 ("ch" $ CHR) | 
   255                 ("comma" $ COMMA) |
   223                 ("o" $ OP) | 
   256                 ("ch" $ CH) |
   224                 ("n" $ NUM) | 
   257                 ("par" $ (LPAREN | RPAREN)) |
   225                 ("f" $ FNUM) | 
   258                 COMMENT | WHITESPACES).%
   226                 ("s" $ SEMI) | 
   259 
   227                 ("co" $ COLON) |
   260 def fun_lex(program: String) : Tokens = {
   228                 ("c" $ COMMA) |
   261   lexing_simp(FUN_REGS, program)
   229                 ("pl" $ LPAREN) |
   262 }
   230                 ("pr" $ RPAREN) |
   263 
   231                 ("w" $ (WHITESPACE | COMMENT))).%
   264 def tokenise(program: String) : Tokens = {
   232 
   265   lexing_simp(FUN_REGS, program)
   233 
   266 }
   234 
   267 
   235 // The tokens for the Fun language
   268 import scala.io.Source._
   236 
   269 
   237 abstract class Token extends Serializable 
       
   238 case object T_SEMI extends Token
       
   239 case object T_COMMA extends Token
       
   240 case object T_COLON extends Token
       
   241 case object T_LPAREN extends Token
       
   242 case object T_RPAREN extends Token
       
   243 case class T_ID(s: String) extends Token
       
   244 case class T_FID(s: String) extends Token
       
   245 case class T_OP(s: String) extends Token
       
   246 case class T_NUM(n: Int) extends Token
       
   247 case class T_FNUM(x: Double) extends Token
       
   248 case class T_KWD(s: String) extends Token
       
   249 case class T_TY(s: String) extends Token
       
   250 case class T_CHR(i: Int) extends Token
       
   251 
       
   252 val token : PartialFunction[(String, String), Token] = {
       
   253   case ("k", s) => T_KWD(s)
       
   254   case ("t", s) => T_TY(s)
       
   255   case ("i", s) => T_ID(s)
       
   256   case ("o", s) => T_OP(s)
       
   257   case ("n", s) => T_NUM(s.toInt)
       
   258   case ("ch", s) => if (s == "'\\n'") T_CHR(10) else T_CHR(s(1).toInt)
       
   259   case ("f", s) => T_FNUM(s.toDouble) 
       
   260   case ("s", _) => T_SEMI
       
   261   case ("c", _) => T_COMMA
       
   262   case ("co", _) => T_COLON
       
   263   case ("pl", _) => T_LPAREN
       
   264   case ("pr", _) => T_RPAREN
       
   265 }
       
   266 
       
   267 
       
   268 def tokenise(s: String) : List[Token] = {
       
   269   val tks = lexing_simp(FUN_REGS, s).collect(token)
       
   270   if (tks.length != 0) tks
       
   271   else { println (s"Tokenise Error") ; sys.exit(-1) }     
       
   272 }
       
   273 
       
   274 //import ammonite.ops._
       
   275 
       
   276 //@doc("Tokenising a file.")
       
   277 @main
   270 @main
   278 def main(fname: String) = {
   271 def lex(filename: String) = {
   279   println(tokenise(os.read(os.pwd / fname)))
   272   // read file
   280 }
   273   val fun_code = fromFile(filename).getLines.mkString("\n")
       
   274   // print tokens to screen
       
   275   println(fun_lex(fun_code).mkString("\n"))
       
   276 }