updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Fri, 09 Dec 2022 11:00:05 +0000 (2022-12-09)
changeset 903 2f86ebda3629
parent 902 b40aaffe0793
child 904 d97283992d4f
updated
progs/fun/fa0.fun
solutions/cw5/fun_llvm.sc
solutions/cw5/fun_parser.sc
solutions/cw5/fun_tokens.sc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun/fa0.fun	Fri Dec 09 11:00:05 2022 +0000
@@ -0,0 +1,5 @@
+def fact(n) =
+  (if n == 0 then 1 else n * fact(n - 1));
+
+
+write(fact(6))
--- a/solutions/cw5/fun_llvm.sc	Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_llvm.sc	Fri Dec 09 11:00:05 2022 +0000
@@ -1,50 +1,17 @@
-// A Small LLVM Compiler for a Simple Functional Language
-// (includes an external lexer and parser)
-//
-//
-// call with                 -- prints out llvm code
-//
-//     amm fun_llvm.sc main fact.fun
-//     amm fun_llvm.sc main defs.fun
-//
-// or                        -- writes llvm code to disk
-//
-//     amm fun_llvm.sc write fact.fun
-//     amm fun_llvm.sc write defs.fun
-//
-//       this will generate an .ll file. 
-//
-// or                       -- runs the generated llvm code via lli
-//
-//     amm fun_llvm.sc run fact.fun
-//     amm fun_llvm.sc run defs.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// 
+// Use amm compiler.sc XXX.fun
+// ./XXX
+// This will generate XXX.ll, XXX.o as well as the binary program.
 //
-//
-// You can interpret an .ll file using lli, for example
-//
-//      lli fact.ll
-//
-// The optimiser can be invoked as
-//
-//      opt -O1 -S in_file.ll > out_file.ll
-//      opt -O3 -S in_file.ll > out_file.ll
-//
-// The code produced for the various architectures can be obtain with
-//   
-//   llc -march=x86 -filetype=asm in_file.ll -o -
-//   llc -march=arm -filetype=asm in_file.ll -o -  
-//
-// Producing an executable can be achieved by
-//
-//    llc -filetype=obj in_file.ll
-//    gcc in_file.o -o a.out
-//    ./a.out
 
+// lexer + parser
 
 import $file.fun_tokens, fun_tokens._
 import $file.fun_parser, fun_parser._ 
 
-
 // for generating new labels
 var counter = -1
 
@@ -53,76 +20,51 @@
   x ++ "_" ++ counter.toString()
 }
 
+// typing
+type Ty = String
+type TyEnv = Map[String, Ty]
+
+// initial typing environment
+val initialEnv = Map[String, Ty]("skip" -> "Void", "print_int" -> "Void", "print_char" -> "Void",
+                                "print_space" -> "Void", "print_star" -> "Void", "new_line" -> "Void")
+
+val typeConversion = Map("Int" -> "i32", "Double" -> "double", "Void" -> "void")
+
 // Internal CPS language for FUN
 abstract class KExp
 abstract class KVal
 
-type Ty = String
-type TyEnv = Map[String, Ty]
-
 case class KVar(s: String, ty: Ty = "UNDEF") extends KVal
-case class KLoad(v: KVal) extends KVal
-case class KNum(i: Int) extends KVal
-case class KFNum(i: Double) extends KVal
-case class KChr(c: Int) extends KVal
+case class KConst(s: String, ty: Ty = "UNDEF") extends KVal
+case class KNum(i: Int) extends KVal  // known type
+case class KFNum(d: Float) extends KVal  // known type
+case class KChConst(c: Int) extends KVal  // known type
 case class Kop(o: String, v1: KVal, v2: KVal, ty: Ty = "UNDEF") extends KVal
 case class KCall(o: String, vrs: List[KVal], ty: Ty = "UNDEF") extends KVal
 
-case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
-  override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2"
+case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
+  override def toString = s"LET $x = $e1 in \n$e2" 
 }
-case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
-  override def toString = s"let $x = $e1 in \n$e2" 
+case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
+  def pad(e: KExp) = e.toString.replaceAll("(?m)^", "  ")
+
+  override def toString = 
+     s"IF $x1\nTHEN\n${pad(e1)}\nELSE\n${pad(e2)}"
 }
 case class KReturn(v: KVal) extends KExp
 
-// typing K values
-def typ_val(v: KVal, ts: TyEnv) : (KVal, Ty) = v match {
-  case KVar(s, _) => {
-    val ty = ts.getOrElse(s, "TUNDEF")
-    (KVar(s, ty), ty)  
-  }
-  case Kop(op, v1, v2, _) => {
-    val (tv1, ty1) = typ_val(v1, ts)
-    val (tv2, ty2) = typ_val(v2, ts)
-    if (ty1 == ty2) (Kop(op, tv1, tv2, ty1), ty1) else (Kop(op, tv1, tv2, "TMISMATCH"), "TMISMATCH") 
-  }
-  case KCall(fname, args, _) => {
-    val ty = ts.getOrElse(fname, "TCALLUNDEF" ++ fname)
-    (KCall(fname, args.map(typ_val(_, ts)._1), ty), ty)
-  }  
-  case KLoad(v) => {
-    val (tv, ty) = typ_val(v, ts)
-    (KLoad(tv), ty)
-  }
-  case KNum(i) => (KNum(i), "Int")
-  case KFNum(i) => (KFNum(i), "Double")
-  case KChr(c) => (KChr(c), "Int")
-}
-
-def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
-  case KReturn(v) => KReturn(typ_val(v, ts)._1)
-  case KLet(x: String, v: KVal, e: KExp) => {
-    val (tv, ty) = typ_val(v, ts)
-    KLet(x, tv, typ_exp(e, ts + (x -> ty)))
-  }
-  case KIf(b, e1, e2) => KIf(b, typ_exp(e1, ts), typ_exp(e2, ts))
-}
-
-
-
-
 // CPS translation from Exps to KExps using a
 // continuation k.
 def CPS(e: Exp)(k: KVal => KExp) : KExp = e match {
-  case Var(s) if (s.head.isUpper) => {
+  case Var(s) => {
+    if (s.head.isUpper) {  // if this variable is a global
       val z = Fresh("tmp")
-      KLet(z, KLoad(KVar(s)), k(KVar(z)))
+      KLet(z, KConst(s), k(KVar(z)))
+    } else k(KVar(s))
   }
-  case Var(s) => k(KVar(s))
   case Num(i) => k(KNum(i))
-  case ChConst(c) => k(KChr(c))
-  case FNum(i) => k(KFNum(i))
+  case FNum(d) => k(KFNum(d))
+  case ChConst(c) => k(KChConst(c))
   case Aop(o, e1, e2) => {
     val z = Fresh("tmp")
     CPS(e1)(y1 => 
@@ -146,77 +88,122 @@
   }
   case Sequence(e1, e2) => 
     CPS(e1)(_ => CPS(e2)(y2 => k(y2)))
-}   
+}
 
-//initial continuation
+// initial continuation
 def CPSi(e: Exp) = CPS(e)(KReturn)
 
-// some testcases
-val e1 = Aop("*", Var("a"), Num(3))
-CPSi(e1)
 
-val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4))
-CPSi(e2)
+// get type of KVal
+def get_typ_val(v: KVal) : Ty = v match {
+  case KNum(i) => "Int"
+  case KFNum(d) => "Double"
+  case KChConst(i) => "Int"
+  case KVar(name, ty) => ty
+  case KConst(name, ty) => ty
+  case Kop(o, v1, v2, ty) => ty
+  case KCall(o, vrs, ty) => ty
+}
 
-val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3)))
-CPSi(e3)
-
-val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3)))
-CPSi(e4)
-
-val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4))
-CPSi(e5)
+// update type information for KValues
+def typ_val(v: KVal, ts: TyEnv) : KVal = v match {
+  case KVar(name, ty) => {
+    if (ts.contains(name)) {
+      KVar(name, ts(name))
+    } else throw new Exception(s"Compile error: unknown type for $name")
+  }
+  case KConst(name, ty) => {
+    if (ts.contains(name)) {
+      KConst(name, ts(name))
+    } else throw new Exception(s"Compile error: unknown type for $name")
+  }
+  case Kop(o, v1, v2, ty) => {
+    val tv1 = typ_val(v1, ts)
+    val tv2 = typ_val(v2, ts)
+    val t1 = get_typ_val(tv1)
+    val t2 = get_typ_val(tv2)
+    if (t1 != t2) throw new Exception(s"Compile error: cannot compare $t1 with $t2")
+    Kop(o, tv1, tv2, t1)
+  }
+  case KCall(o, vrs, ty) => {
+    val new_vrs = vrs.map(vr => typ_val(vr, ts))
+    if (ts.contains(o)) {
+      KCall(o, new_vrs, ts(o))
+    } else throw new Exception(s"Compile error: unknown type for $o")
+  }
+  case x => x  // no changes: KNum, KFNum, KChConst
+}
 
-val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40))
-CPSi(e6)
+// update type information for KExpressions
+def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
+  case KLet(x, e1, e2) => {
+    val te1 = typ_val(e1, ts)
+    val env1 = ts + (x -> get_typ_val(te1))
+    val te2 = typ_exp(e2, env1)
+    KLet(x, te1, te2)
+  }
+  case KIf(x1, e1, e2) => KIf(x1, typ_exp(e1, ts), typ_exp(e2, ts))
+  case KReturn(v) => KReturn(typ_val(v, ts))
+}
 
-val e7 = Call("foo", List(Num(3)))
-CPSi(e7)
+// prelude
+val prelude = """
+declare i32 @printf(i8*, ...)
 
-val e8 = Call("foo", List(Aop("*", Num(3), Num(1)), Num(4), Aop("+", Num(5), Num(6))))
-CPSi(e8)
+@.str_nl = private constant [2 x i8] c"\0A\00"
+@.str_star = private constant [2 x i8] c"*\00"
+@.str_space = private constant [2 x i8] c" \00"
+@.str_int = private constant [3 x i8] c"%d\00"
+@.str_c = private constant [3 x i8] c"%c\00"
+
+define void @new_line() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
 
-val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6)))
-CPSi(e9)
+define void @print_star() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
+
+define void @print_space() #0 {
+  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0)
+  ret void
+}
 
-val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4))
-CPSi(e)
+define void @print_int(i32 %x) {
+  %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
+  ret void
+}
 
+define void @print_char(i32 %x) {
+  %t0 = getelementptr [3 x i8], [3 x i8]* @.str_c, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
+  ret void
+}
 
+define void @skip() #0 {
+  ret void
+}
 
+; END OF BUILT-IN FUNCTIONS (prelude)
+"""
 
 // convenient string interpolations 
 // for instructions, labels and methods
 import scala.language.implicitConversions
 import scala.language.reflectiveCalls
 
-
-
-
-implicit def sring_inters(sc: StringContext) = new {
+implicit def string_inters(sc: StringContext) = new {
     def i(args: Any*): String = "   " ++ sc.s(args:_*) ++ "\n"
     def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
     def m(args: Any*): String = sc.s(args:_*) ++ "\n"
 }
 
-def get_ty(s: String) = s match {
-  case "Double" => "double"
-  case "Void" => "void"
-  case "Int" => "i32"
-  case "Bool" => "i2"
-  case _ => s
-}
-
-def compile_call_arg(a: KVal) = a match {
-  case KNum(i) => s"i32 $i"
-  case KFNum(i) => s"double $i"
-  case KChr(c) => s"i32 $c"
-  case KVar(s, ty) => s"${get_ty(ty)} %$s" 
-}
-
-def compile_arg(s: (String, String)) = s"${get_ty(s._2)} %${s._1}" 
-
-
 // mathematical and boolean operations
 def compile_op(op: String) = op match {
   case "+" => "add i32 "
@@ -225,48 +212,70 @@
   case "/" => "sdiv i32 "
   case "%" => "srem i32 "
   case "==" => "icmp eq i32 "
-  case "!=" => "icmp ne i32 "      // not equal 
-  case "<=" => "icmp sle i32 "     // signed less or equal
-  case "<"  => "icmp slt i32 "     // signed less than
+  case "!=" => "icmp ne i32 "
+  case "<=" => "icmp sle i32 "
+  case "<"  => "icmp slt i32 "
+  case ">=" => "icmp sge i32 "
+  case ">" => "icmp sgt i32 "
 }
 
 def compile_dop(op: String) = op match {
   case "+" => "fadd double "
   case "*" => "fmul double "
   case "-" => "fsub double "
+  case "/" => "fdiv double "
+  case "%" => "frem double "
   case "==" => "fcmp oeq double "
-  case "<=" => "fcmp ole double "   
-  case "<"  => "fcmp olt double "   
+  case "!=" => "fcmp one double "
+  case "<=" => "fcmp ole double "
+  case "<" => "fcmp olt double "
+  case ">=" => "icmp sge double "
+  case ">" => "icmp sgt double "
+}
+
+def compile_args(vrs: List[KVal]) : List[String] = vrs match {
+  case Nil => Nil
+  case x::xs => s"${typeConversion(get_typ_val(x))} ${compile_val(x)}" :: compile_args(xs)
 }
 
 // compile K values
 def compile_val(v: KVal) : String = v match {
   case KNum(i) => s"$i"
-  case KFNum(i) => s"$i"
-  case KChr(c) => s"$c"
-  case KVar(s, ty) => s"%$s" 
-  case KLoad(KVar(s, ty)) => s"load ${get_ty(ty)}, ${get_ty(ty)}* @$s"
-  case Kop(op, x1, x2, ty) => ty match { 
-    case "Int" => s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
-    case "Double" => s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
-    case _ => Kop(op, x1, x2, ty).toString
+  case KFNum(d) => s"$d"
+  case KChConst(i) => s"$i"  // as integer
+  case KVar(s, ty) => s"%$s"
+  case KConst(s, ty) => {
+    val t = typeConversion(ty)
+    s"load $t, $t* @$s"
   }
-  case KCall(fname, args, ty) => 
-    s"call ${get_ty(ty)} @$fname (${args.map(compile_call_arg).mkString(", ")})"
+  case Kop(op, x1, x2, ty) => {
+    if (ty == "Double") {
+      s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+    } else if (ty == "Int") {
+      s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+    } else throw new Exception("Compile error: unknown type for comparison")
+  }
+  case KCall(x1, args, ty) => {
+    s"call ${typeConversion(ty)} @$x1 (${compile_args(args).mkString(", ")})"
+  }
 }
 
 // compile K expressions
 def compile_exp(a: KExp) : String = a match {
-  case KReturn(KVar("void", _)) =>
-    i"ret void"
-  case KReturn(KVar(x, ty)) =>
-    i"ret ${get_ty(ty)} %$x"
-  case KReturn(KNum(i)) =>
-    i"ret i32 $i"
-  case KLet(x: String, KCall(o: String, vrs: List[KVal], "Void"), e: KExp) => 
-    i"${compile_val(KCall(o: String, vrs: List[KVal], "Void"))}" ++ compile_exp(e)
-  case KLet(x: String, v: KVal, e: KExp) => 
-    i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+  case KReturn(v) => {
+    val ty = get_typ_val(v)
+    if (ty == "Void") {
+      i"ret void"
+    } else {
+      i"ret ${typeConversion(ty)} ${compile_val(v)}"
+    }
+  }
+  case KLet(x: String, v: KVal, e: KExp) => {
+    val tv = get_typ_val(v)
+    if (tv == "Void") {
+      i"${compile_val(v)}" ++ compile_exp(e)
+    } else i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+  }
   case KIf(x, e1, e2) => {
     val if_br = Fresh("if_branch")
     val else_br = Fresh("else_branch")
@@ -278,100 +287,50 @@
   }
 }
 
-
-val prelude = """
-declare i32 @printf(i8*, ...)
-
-@.str_nl = private constant [2 x i8] c"\0A\00"
-@.str_star = private constant [2 x i8] c"*\00"
-@.str_space = private constant [2 x i8] c" \00"
-
-define void @new_line() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @print_star() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @print_space() #0 {
-  %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
-  %1 = call i32 (i8*, ...) @printf(i8* %t0)
-  ret void
-}
-
-define void @skip() #0 {
-  ret void
-}
-
-@.str_int = private constant [3 x i8] c"%d\00"
-
-define void @print_int(i32 %x) {
-   %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
-   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
-   ret void
-}
-
-@.str_char = private constant [3 x i8] c"%c\00"
-
-define void @print_char(i32 %x) {
-   %t0 = getelementptr [3 x i8], [3 x i8]* @.str_char, i32 0, i32 0
-   call i32 (i8*, ...) @printf(i8* %t0, i32 %x) 
-   ret void
-}
-
-; END OF BUILD-IN FUNCTIONS (prelude)
-
-"""
-
-def get_cont(ty: Ty) = ty match {
-  case "Int" =>    KReturn
-  case "Double" => KReturn
-  case "Void" =>   { (_: KVal) => KReturn(KVar("void", "Void")) }
-} 
-
-// compile function for declarations and main
-def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
-  case Def(name, args, ty, body) => { 
-    val ts2 = ts + (name -> ty)
-    val tkbody = typ_exp(CPS(body)(get_cont(ty)), ts2 ++ args.toMap)
-    (m"define ${get_ty(ty)} @$name (${args.map(compile_arg).mkString(",")}) {" ++
-     compile_exp(tkbody) ++
-     m"}\n", ts2)
-  }
-  case Main(body) => {
-    val tbody = typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts)
-    (m"define i32 @main() {" ++
-     compile_exp(tbody) ++
-     m"}\n", ts)
-  }
-  case Const(name, n) => {
-    (m"@$name = global i32 $n\n", ts + (name -> "Int"))
-  }
-  case FConst(name, x) => {
-    (m"@$name = global double $x\n", ts + (name -> "Double"))
+def compile_def_args(args: List[(String, String)], ts: TyEnv) : (List[String], TyEnv) = args match {
+  case Nil => (Nil, ts)
+  case (n, t)::xs => {
+    if (t == "Void") throw new Exception("Compile error: argument of type void is invalid")
+    val (rest, env) = compile_def_args(xs, ts + (n -> t))
+    (s"${typeConversion(t)} %$n" :: rest, env)
   }
 }
 
-def compile_prog(prog: List[Decl], ty: TyEnv) : String = prog match {
-  case Nil => ""
-  case d::ds => {
-    val (s2, ty2) = compile_decl(d, ty)
-    s2 ++ compile_prog(ds, ty2)
+def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
+  case Const(name, value) => {
+    (m"@$name = global i32 $value\n", ts + (name -> "Int"))
+  }
+  case FConst(name, value) => {
+    (m"@$name = global double $value\n", ts + (name -> "Double"))
+  }
+  case Def(name, args, ty, body) => {
+    val (argList, env1) = compile_def_args(args, ts + (name -> ty))
+    (m"define ${typeConversion(ty)} @$name (${argList.mkString(", ")}) {" ++
+    compile_exp(typ_exp(CPSi(body), env1)) ++
+    m"}\n", ts + (name -> ty))  // don't preserve local variables in environment
+  }
+  case Main(body) => {
+    (m"define i32 @main() {" ++
+    compile_exp(typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts + ("main" -> "Int"))) ++
+    m"}\n", ts + ("main" -> "Int"))
   }
 }
-// main compiler functions
-def compile(prog: List[Decl]) : String = 
-  prelude ++ compile_prog(prog, Map("new_line" -> "Void", "skip" -> "Void", 
-				    "print_star" -> "Void", "print_space" -> "Void",
-                                    "print_int" -> "Void", "print_char" -> "Void"))
 
+// recursively update the typing environment while compiling
+def compile_block(prog: List[Decl], ts: TyEnv) : (String, TyEnv) = prog match {
+  case Nil => ("", ts)
+  case x::xs => {
+    val (compiled, env) = compile_decl(x, ts)
+    val (compiled_block, env1) = compile_block(xs, env)
+    (compiled ++ compiled_block, env1)
+  }
+}
 
-//import ammonite.ops._
+def fun_compile(prog: List[Decl]) : String = {
+  val tyenv = initialEnv
+  val (compiled, _) = compile_block(prog, tyenv)
+  prelude ++ compiled
+}
 
 
 @main
@@ -379,8 +338,8 @@
     val path = os.pwd / fname
     val file = fname.stripSuffix("." ++ path.ext)
     val tks = tokenise(os.read(path))
-    val ast = parse_tks(tks)
-    val code = compile(ast)
+    val ast = parse_tks(tks).head
+    val code = fun_compile(ast)
     println(code)
 }
 
@@ -389,8 +348,8 @@
     val path = os.pwd / fname
     val file = fname.stripSuffix("." ++ path.ext)
     val tks = tokenise(os.read(path))
-    val ast = parse_tks(tks)
-    val code = compile(ast)
+    val ast = parse_tks(tks).head
+    val code = fun_compile(ast)
     //println(code)
     os.write.over(os.pwd / (file ++ ".ll"), code)
 }
@@ -407,6 +366,3 @@
 }
 
 
-
-
-
--- a/solutions/cw5/fun_parser.sc	Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_parser.sc	Fri Dec 09 11:00:05 2022 +0000
@@ -1,215 +1,263 @@
-// A parser for the Fun language
-//================================
-//
-// call with 
-//
-//     amm fun_parser.sc fact.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// parser: convert sequence of tokens to AST
+
 //
-//     amm fun_parser.sc defs.fun
+// Use this command to print parsed AST:
+// amm fun_parser.sc <name>.fun
 //
-// this will generate a parse-tree from a list
-// of tokens
 
-import scala.language.implicitConversions    
-import scala.language.reflectiveCalls
+import $file.fun_tokens, fun_tokens._
 
-import $file.fun_tokens, fun_tokens._ 
-
-
-// Parser combinators
-//    type parameter I needs to be of Seq-type
-//
-abstract class Parser[I, T](implicit ev: I => Seq[_]) {
-  def parse(ts: I): Set[(T, I)]
+// more convenience for the map parsers later on;
+// it allows writing nested patterns as
+// case x ~ y ~ z => ...
+case class ~[+A, +B](x: A, y: B)
 
-  def parse_single(ts: I) : T = 
-    parse(ts).partition(_._2.isEmpty) match {
-      case (good, _) if !good.isEmpty => good.head._1
-      case (good, err) if err.isEmpty => {
-        println (s"Parse Error\n $good \n $err") ; sys.exit(-1) }
-      case (_, err) => { 
-	println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) }
-    }
+// constraint for the input
+type IsSeq[A] = A => Seq[_]
+
+abstract class Parser[I : IsSeq, T]{
+  def parse(in: I): Set[(T, I)]
+
+  def parse_all(in: I) : Set[T] =
+    for ((hd, tl) <- parse(in);
+        if tl.isEmpty) yield hd
 }
 
-// convenience for writing grammar rules
-case class ~[+A, +B](_1: A, _2: B)
+// parser combinators
 
-class SeqParser[I, T, S](p: => Parser[I, T], 
-                         q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] {
-  def parse(sb: I) = 
-    for ((head1, tail1) <- p.parse(sb); 
-         (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
+// sequence parser
+class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],
+                                 q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
+  def parse(in: I) =
+    for ((hd1, tl1) <- p.parse(in);
+         (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
 }
 
-class AltParser[I, T](p: => Parser[I, T], 
-                      q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
-  def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
+// alternative parser
+class AltParser[I : IsSeq, T](p: => Parser[I, T],
+                              q: => Parser[I, T]) extends Parser[I, T] {
+  def parse(in: I) = p.parse(in) ++ q.parse(in)
 }
 
-class FunParser[I, T, S](p: => Parser[I, T], 
-                         f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
-  def parse(sb: I) = 
-    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+// map parser
+class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
+                                 f: T => S) extends Parser[I, S] {
+  def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
 }
 
-// convenient combinators
-implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
-  def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
-  def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
+// more convenient syntax for parser combinators
+implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
+  def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+  def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
 }
 
-def ListParser[I, T, S](p: => Parser[I, T], 
-                        q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
-  (p ==> ((s) => List(s))) ||
-  (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] }
-}
+// -------------------------------------------------
+// atomic parsers
 
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
-  def parse(ts: List[Token]) = ts match {
-    case t::ts if (t == tok) => Set((t, ts)) 
+// atomic parser for types
+case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
     case _ => Set()
   }
 }
 
-implicit def token2tparser(t: Token) = TokParser(t)
-
-implicit def TokOps(t: Token) = new {
-  def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
-  def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
-  def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
+// atomic parser for global ids
+case object GlobalIdParser extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
+    case _ => Set()
+  }
 }
 
-case object EmptyParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = Set(("", ts))
+// atomic parser for ids
+case object IdParser extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
+    case _ => Set()
+  }
 }
 
-case object NumParser extends Parser[List[Token], Int] {
-  def parse(ts: List[Token]) = ts match {
-    case T_NUM(n)::ts => Set((n, ts)) 
-    case _ => Set ()
+// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
+case object DoubleParser extends Parser[Tokens, Float] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
+    case _ => Set()
   }
 }
 
-case object FNumParser extends Parser[List[Token], Double] {
-  def parse(ts: List[Token]) = ts match {
-    case T_FNUM(x)::ts => Set((x, ts)) 
+// atomic parser for integers
+case object IntParser extends Parser[Tokens, Int] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
+    case _ => Set()
+  }
+}
+
+// atomic parser for operators
+case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
+    case _ => Set()
+  }
+}
+
+// atomic parser for character
+case object CharParser extends Parser[Tokens, Char] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "ch" => {
+      val stripped = tk._2.slice(1, tk._2.length-1)  // strip off single quotes
+      stripped match {
+        case "\\n" => Set(('\n', tkns))
+        case "\\t" => Set(('\t', tkns))
+        case "\\r" => Set(('\r', tkns))
+        case c => Set((c(0), tkns))
+      }
+    }
     case _ => Set()
   }
 }
 
-case object IdParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = ts match {
-    case T_ID(s)::ts => Set((s, ts)) 
-    case _ => Set ()
+// parser for list of arguments
+def ListParser[I, T, S](p: => Parser[I, T], 
+                        q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
+  (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
+  (p.map((s) => List(s)))
+}
+
+// I may want to write string interpolations for:
+// keywords, semicolon, colon, comma, parentheses
+case class StrParser(s: String) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._2 == s => Set((s, tkns))
+    case _ => Set()
   }
 }
 
-case object CharConstParser extends Parser[List[Token], Int] {
-  def parse(ts: List[Token]) = ts match {
-    case T_CHR(c)::ts => Set((c, ts)) 
-    case _ => Set ()
-  }
-}
-
-case object TyParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = ts match {
-    case T_TY(s)::ts => Set((s, ts)) 
-    case _ => Set ()
-  }
+implicit def parser_interpolation(sc: StringContext) = new {
+  def p(args: Any*) = StrParser(sc.s(args:_*))
 }
 
 
-// Abstract syntax trees for the Fun language
-abstract class Exp 
-abstract class BExp 
-abstract class Decl 
+// the AST datastructures for the FUN language
+
+abstract class Exp
+abstract class BExp
+abstract class Decl
 
 case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
 case class Main(e: Exp) extends Decl
 case class Const(name: String, v: Int) extends Decl
-case class FConst(name: String, x: Double) extends Decl
+case class FConst(name: String, x: Float) extends Decl
 
 case class Call(name: String, args: List[Exp]) extends Exp
 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
 case class Var(s: String) extends Exp
-case class Num(i: Int) extends Exp     // integer numbers
-case class FNum(i: Double) extends Exp  // floating numbers
-case class ChConst(c: Int) extends Exp // char constant
+case class Num(i: Int) extends Exp  // integer numbers
+case class FNum(i: Float) extends Exp  // float numbers
+case class ChConst(c: Int) extends Exp  // character constants
 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
-case class Sequence(e1: Exp, e2: Exp) extends Exp
+case class Sequence(e1: Exp, e2: Exp) extends Exp  // expressions separated by semicolons
+
 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
 
 
-// arithmetic expressions (there needs to be an F in the SEMICOLON case)
-lazy val Exp: Parser[List[Token], Exp] = 
-  (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
-    { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
-  (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L
-lazy val L: Parser[List[Token], Exp] = 
-  (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } ||
-  (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
-lazy val T: Parser[List[Token], Exp] = 
-  (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || 
-  (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || 
-  (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F
-lazy val F: Parser[List[Token], Exp] = 
-  (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } ||
-  (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
-  (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || 
-  IdParser ==> { case x => Var(x): Exp } || 
-  NumParser ==> { case x => Num(x): Exp } ||
-  CharConstParser ==> { case x => ChConst(x): Exp } ||
-  FNumParser ==> { case x => FNum(x): Exp }
+lazy val Exps: Parser[Tokens, Exp] =
+  (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
+  Exp
+
+lazy val Exp: Parser[Tokens, Exp] =
+  (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
+  M
+
+lazy val M: Parser[Tokens, Exp] = 
+  (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+  T
+
+lazy val T: Parser[Tokens, Exp] = 
+  (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+  U
+
+// includes negative factor
+// a + - b CAN be recognised
+// - - - b CAN be recognised
+lazy val U: Parser[Tokens, Exp] =
+  (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
+  (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
+  F
+
+lazy val F: Parser[Tokens, Exp] = 
+  (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
+  (p"skip").map(_ => Call("skip", Nil)) ||  // hardcoded
+  (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) ||  // hardcoded
+  (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
+  (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } ||  // NOTE: empty args are also accepted!
+  (IdParser || GlobalIdParser).map(x => Var(x)) ||
+  IntParser.map(x => Num(x)) ||
+  DoubleParser.map(x => FNum(x)) ||
+  CharParser.map(x => ChConst(x.toInt)) ||
+  (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
 
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] = 
-  (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
-  (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
-  (Exp ~ T_OP("<") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
-  (Exp ~ T_OP(">") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
-  (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
-  (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } ||
-  (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp } 
+lazy val BExp: Parser[Tokens, BExp] = 
+  (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
+  (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
+
+lazy val TypedIdParser: Parser[Tokens, (String, String)] =
+  (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
 
-lazy val Arg : Parser[List[Token], (String, String)] = 
-  (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) }  
+lazy val Defn: Parser[Tokens, Decl] =
+  (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+    case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
+  } ||
+  (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+    case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
+  }
 
-lazy val Defn: Parser[List[Token], Decl] = {
-   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
-     { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } ||
-   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
-     { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl }
-}
+lazy val Constp: Parser[Tokens, Decl] = 
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
+    case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
+  } ||
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
+    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
+  }
 
-lazy val Const_decl: Parser[List[Token], Decl] =
-   (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==>
-     { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } ||
-   (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==>
-     { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl } 
+// Int can be converted to Double but not viceversa
+lazy val FConstp: Parser[Tokens, Decl] =
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+    case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
+  } ||
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
+  }
 
-lazy val Prog: Parser[List[Token], List[Decl]] =
-  (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
-  (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
-  (Exp ==> ((s) => List(Main(s)) : List[Decl]))
+// Prog consists of global const declarations, f(x) defs, and exp in ANY order
+// restricted to main body at the bottom
+lazy val Prog: Parser[Tokens, List[Decl]] = 
+  (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  Exp.map[List[Decl]](s => List(Main(s)))
 
 
+def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
 
-// Reading tokens and Writing parse trees
+import scala.io.Source._
 
-//import ammonite.ops._
-
-def parse_tks(tks: List[Token]) : List[Decl] = {
-  //println(Prog.parse(tks))
-  Prog.parse_single(tks)
+@main
+def parse(filename: String) = {
+  val fun_code = fromFile(filename).getLines.mkString("\n")
+  // print the AST list to screen
+  println(parse_tks(tokenise(fun_code)))
 }
-
-//@doc("Parses a file.")
-@main
-def main(fname: String) : Unit = {
-  val tks = tokenise(os.read(os.pwd / fname))
-  println(parse_tks(tks))
-}
-
-
--- a/solutions/cw5/fun_tokens.sc	Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_tokens.sc	Fri Dec 09 11:00:05 2022 +0000
@@ -1,27 +1,31 @@
-// A tokeniser for the Fun language
-//==================================
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// lexer
+
 //
-// call with 
-//
-//     amm fun_tokens.sc fact.fun
-//
-//     amm fun_tokens.sc defs.fun
+// Use this command to print the list of tokens:
+// amm fun_token.sc <name>.fun
 //
 
-
+type Token = (String, String)
+type Tokens = List[Token]
 
-import scala.language.implicitConversions    
-import scala.language.reflectiveCalls 
-
-abstract class Rexp 
+// regular expressions including records
+abstract class Rexp
 case object ZERO extends Rexp
 case object ONE extends Rexp
 case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
-case class RECD(x: String, r: Rexp) extends Rexp
-  
+case class RANGE(chars: List[Char]) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class OPTIONAL(r: Rexp) extends Rexp
+case class PLUS(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+case class RECD(x: String, r: Rexp) extends Rexp  // records for extracting strings or tokens
+
+// values
 abstract class Val
 case object Empty extends Val
 case class Chr(c: Char) extends Val
@@ -29,20 +33,27 @@
 case class Left(v: Val) extends Val
 case class Right(v: Val) extends Val
 case class Stars(vs: List[Val]) extends Val
+case class Opt(v: Val) extends Val
+case class Pls(vs: List[Val]) extends Val
+case class Nt(vs: List[Val]) extends Val
 case class Rec(x: String, v: Val) extends Val
-   
+
 // some convenience for typing in regular expressions
 def charlist2rexp(s : List[Char]): Rexp = s match {
   case Nil => ONE
   case c::Nil => CHAR(c)
-  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+  case c::vs => SEQ(CHAR(c), charlist2rexp(vs))
 }
-implicit def string2rexp(s : String) : Rexp = 
+
+implicit def string2rexp(s : String) : Rexp =
   charlist2rexp(s.toList)
 
 implicit def RexpOps(r: Rexp) = new {
   def | (s: Rexp) = ALT(r, s)
   def % = STAR(r)
+  def ? = OPTIONAL(r)
+  def + = PLUS(r)
+  def ^ (n: Int) = NTIMES(r, n)
   def ~ (s: Rexp) = SEQ(r, s)
 }
 
@@ -50,66 +61,89 @@
   def | (r: Rexp) = ALT(s, r)
   def | (r: String) = ALT(s, r)
   def % = STAR(s)
+  def ? = OPTIONAL(s)
+  def + = PLUS(s)
+  def ^ (n: Int) = NTIMES(s, n)
   def ~ (r: Rexp) = SEQ(s, r)
   def ~ (r: String) = SEQ(s, r)
   def $ (r: Rexp) = RECD(s, r)
 }
 
-def nullable (r: Rexp) : Boolean = r match {
+def nullable(r: Rexp) : Boolean = r match {
   case ZERO => false
   case ONE => true
   case CHAR(_) => false
+  case RANGE(_) => false
   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
   case STAR(_) => true
+  case OPTIONAL(r1) => true
+  case PLUS(r1) => nullable(r1)
+  case NTIMES(r1, n) => if (n == 0) true else nullable(r1)
   case RECD(_, r1) => nullable(r1)
 }
 
-def der (c: Char, r: Rexp) : Rexp = r match {
+def der(c: Char, r: Rexp) : Rexp = r match {
   case ZERO => ZERO
   case ONE => ZERO
   case CHAR(d) => if (c == d) ONE else ZERO
+  case RANGE(chars) => if (chars.contains(c)) ONE else ZERO
   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
-  case SEQ(r1, r2) => 
+  case SEQ(r1, r2) =>
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
     else SEQ(der(c, r1), r2)
   case STAR(r) => SEQ(der(c, r), STAR(r))
+  case OPTIONAL(r) => der(c, r)
+  case PLUS(r) => SEQ(der(c, r), STAR(r))
+  case NTIMES(r1, n) => if (n == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, n - 1))
   case RECD(_, r1) => der(c, r1)
 }
 
-
-// extracts a string from value
+// extracts a string from a value
 def flatten(v: Val) : String = v match {
   case Empty => ""
   case Chr(c) => c.toString
   case Left(v) => flatten(v)
   case Right(v) => flatten(v)
-  case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+  case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
   case Stars(vs) => vs.map(flatten).mkString
+  case Opt(v) => flatten(v)
+  case Pls(vs) => vs.map(flatten).mkString
+  case Nt(vs) => vs.map(flatten).mkString
   case Rec(_, v) => flatten(v)
 }
 
 // extracts an environment from a value;
-// used for tokenise a string
-def env(v: Val) : List[(String, String)] = v match {
+// used for tokenising a string
+def env(v: Val) : Tokens = v match {
   case Empty => Nil
   case Chr(c) => Nil
   case Left(v) => env(v)
   case Right(v) => env(v)
   case Sequ(v1, v2) => env(v1) ::: env(v2)
   case Stars(vs) => vs.flatMap(env)
+  case Opt(v) => env(v)
+  case Pls(vs) => vs.flatMap(env)
+  case Nt(vs) => vs.flatMap(env)
   case Rec(x, v) => (x, flatten(v))::env(v)
 }
 
-// The Injection Part of the lexer
+
+// The injection and mkeps part of the lexer
+//===========================================
 
 def mkeps(r: Rexp) : Val = r match {
   case ONE => Empty
-  case ALT(r1, r2) => 
+  case RANGE(chars) => throw new Exception("lexing error")  // this will never be called but the coursework asks for it so...
+  case ALT(r1, r2) =>
     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   case STAR(r) => Stars(Nil)
+  case OPTIONAL(r) => Opt(Empty)
+  case PLUS(r) => Pls(List(mkeps(r))) // scala define a list with one element
+  case NTIMES(r, n) => if (n == 0) Nt(Nil) else Nt(List.fill(n)(mkeps(r))) // wrong
   case RECD(x, r) => Rec(x, mkeps(r))
+  case _ => throw new Exception("lexing error")
 }
 
 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
@@ -119,9 +153,12 @@
   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
-  case (CHAR(d), Empty) => Chr(c) 
+  case (CHAR(d), Empty) => Chr(c)
+  case (RANGE(chars), Empty) => Chr(c)
+  case (OPTIONAL(r1), v) => Opt(inj(r1, c, v))
+  case (PLUS(r1), Sequ(v1, Stars(vs))) => Pls(inj(r1, c, v1)::vs)
+  case (NTIMES(r1, n), Sequ(v1, Nt(vs))) => Nt(inj(r1, c, v1)::vs)
   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
-  case _ => { println ("Injection error") ; sys.exit(-1) } 
 }
 
 // some "rectification" functions for simplification
@@ -135,15 +172,14 @@
 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
 }
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
   (v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
   (v:Val) => Sequ(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
-  case Rec(x, v) => Rec(x, f(v))
-}
+
 def F_ERROR(v: Val): Val = throw new Exception("error")
 
+// simplification
 def simp(r: Rexp): (Rexp, Val => Val) = r match {
   case ALT(r1, r2) => {
     val (r1s, f1s) = simp(r1)
@@ -152,7 +188,7 @@
       case (ZERO, _) => (r2s, F_RIGHT(f2s))
       case (_, ZERO) => (r1s, F_LEFT(f1s))
       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
-                else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
+                else (ALT (r1s, r2s), F_ALT(f1s, f2s))
     }
   }
   case SEQ(r1, r2) => {
@@ -166,115 +202,75 @@
       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
     }
   }
-  case RECD(x, r1) => {
-    val (r1s, f1s) = simp(r1)
-    (RECD(x, r1s), F_RECD(f1s))
-  }
   case r => (r, F_ID)
 }
 
 // lexing functions including simplification
 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
-  case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) } 
+  case Nil => if (nullable(r)) mkeps(r) else
+    { throw new Exception("lexing error") }
   case c::cs => {
     val (r_simp, f_simp) = simp(der(c, r))
     inj(r, c, f_simp(lex_simp(r_simp, cs)))
   }
 }
 
-def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
+def lexing_simp(r: Rexp, s: String) =
+  env(lex_simp(r, s.toList))
 
 
-// The Lexing Rules for the Fun Language
-
-def PLUS(r: Rexp) = r ~ r.%
-def OPT(r: Rexp) = r | ONE
+// FUN language lexer
 
-val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | 
-          "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | 
-          "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" |"E" | "F" | "G" |
-          "H" | "I" | "J" | "K" |"L" | "M" | "N" |
-          "O" | "P" | "Q" | "R" |"S" | "T" | "U" |
-          "V" | "W" | "X" | "Y" | "Z" | "_" | ":"
-val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val ID = SYM ~ (SYM | DIGIT).% 
-val NUM = PLUS(DIGIT)
-val FNUM = OPT("-") ~ NUM ~ "." ~ NUM 
-val KEYWORD : Rexp = "if" | "then" | "else" | "def" | "val"
-val TYPE : Rexp = "Void" | "Int" | "Double" 
-val SEMI: Rexp = ";"
-val COLON: Rexp = ":"
-val COMMA: Rexp = ","
-val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
-val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
-val RPAREN: Rexp = ")" | "}"
-val LPAREN: Rexp = "(" | "{"
-val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "-" | "." | "\"" | "=" | "," | "(" | ")" | "{" | "}"
-val ALL2 = ALL | "\n"
-val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
-
-val CHR :Rexp = "'" ~ (ALL | "\\n") ~ "'" 
+val DIGIT = RANGE("0123456789".toList)
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val SYM = RANGE("!\"#$%&'()*+,-./:;<>=?`@[]\\^_{}|~".toList)  // I referenced the CPP ASCII table https://en.cppreference.com/w/cpp/language/ascii
 
 
-val FUN_REGS = (("k" $ KEYWORD) | 
-                ("t" $ TYPE) |
-                ("i" $ ID) | 
-                ("ch" $ CHR) | 
-                ("o" $ OP) | 
-                ("n" $ NUM) | 
-                ("f" $ FNUM) | 
-                ("s" $ SEMI) | 
-                ("co" $ COLON) |
-                ("c" $ COMMA) |
-                ("pl" $ LPAREN) |
-                ("pr" $ RPAREN) |
-                ("w" $ (WHITESPACE | COMMENT))).%
-
-
-
-// The tokens for the Fun language
+val KEYWORD : Rexp = "val" | "if" | "then" | "else" | "def" | "skip" // "skip" is hardcoded because hanoi.fun calls skip() without parentheses
+val TYPE : Rexp = "Int" | "Double" | "Void"
+val GLOBAL_ID : Rexp = UPPERCASE ~ ("_" | LOWERCASE | DIGIT | UPPERCASE).% // start with capital letter and followed by any case
+val ID : Rexp = LOWERCASE ~ ("_" | UPPERCASE | LOWERCASE | DIGIT).% // start with lowercase 
+val SEMI : Rexp = ";"
+val COLON : Rexp = ":"
+val OP : Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" // no && and || operators
+val INT : Rexp = DIGIT.+
+val DOUBLE : Rexp = DIGIT.+ ~ "." ~ DIGIT.+  // negative numbers sign is lexed as operator, but the parser will identify negative numbers
+val COMMA : Rexp = "," 
+val WHITESPACES: Rexp = (" " | "\n" | "\t" | "\r").+ // whitespaces are either " " or \n or \t or \r
+val LPAREN : Rexp = RANGE("({".toList)
+val RPAREN : Rexp = RANGE(")}".toList)
+val CH : Rexp = "'" ~ (LOWERCASE | UPPERCASE | DIGIT | SYM | " " | "\\n" | "\\t" | "\\r") ~ "'"  // \n, \t and \r should also be tokenized, any character should be, whitespaces too
+val COMMENT : Rexp = ("//" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \t\r".toList)).% ~ "\n") | ("/*" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \n\t\r".toList)).% ~ "*/")
 
-abstract class Token extends Serializable 
-case object T_SEMI extends Token
-case object T_COMMA extends Token
-case object T_COLON extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_FID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_FNUM(x: Double) extends Token
-case class T_KWD(s: String) extends Token
-case class T_TY(s: String) extends Token
-case class T_CHR(i: Int) extends Token
+val FUN_REGS = (("keyword" $ KEYWORD) |
+                ("type" $ TYPE) |
+                ("global" $ GLOBAL_ID) |
+                ("id" $ ID) |
+                ("op" $ OP) |
+                ("double" $ DOUBLE) |
+                ("int" $ INT) |
+                ("semi" $ SEMI) |
+                ("colon" $ COLON) |
+                ("comma" $ COMMA) |
+                ("ch" $ CH) |
+                ("par" $ (LPAREN | RPAREN)) |
+                COMMENT | WHITESPACES).%
 
-val token : PartialFunction[(String, String), Token] = {
-  case ("k", s) => T_KWD(s)
-  case ("t", s) => T_TY(s)
-  case ("i", s) => T_ID(s)
-  case ("o", s) => T_OP(s)
-  case ("n", s) => T_NUM(s.toInt)
-  case ("ch", s) => if (s == "'\\n'") T_CHR(10) else T_CHR(s(1).toInt)
-  case ("f", s) => T_FNUM(s.toDouble) 
-  case ("s", _) => T_SEMI
-  case ("c", _) => T_COMMA
-  case ("co", _) => T_COLON
-  case ("pl", _) => T_LPAREN
-  case ("pr", _) => T_RPAREN
+def fun_lex(program: String) : Tokens = {
+  lexing_simp(FUN_REGS, program)
 }
 
-
-def tokenise(s: String) : List[Token] = {
-  val tks = lexing_simp(FUN_REGS, s).collect(token)
-  if (tks.length != 0) tks
-  else { println (s"Tokenise Error") ; sys.exit(-1) }     
+def tokenise(program: String) : Tokens = {
+  lexing_simp(FUN_REGS, program)
 }
 
-//import ammonite.ops._
+import scala.io.Source._
 
-//@doc("Tokenising a file.")
 @main
-def main(fname: String) = {
-  println(tokenise(os.read(os.pwd / fname)))
+def lex(filename: String) = {
+  // read file
+  val fun_code = fromFile(filename).getLines.mkString("\n")
+  // print tokens to screen
+  println(fun_lex(fun_code).mkString("\n"))
 }