--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fun/fa0.fun Fri Dec 09 11:00:05 2022 +0000
@@ -0,0 +1,5 @@
+def fact(n) =
+ (if n == 0 then 1 else n * fact(n - 1));
+
+
+write(fact(6))
--- a/solutions/cw5/fun_llvm.sc Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_llvm.sc Fri Dec 09 11:00:05 2022 +0000
@@ -1,50 +1,17 @@
-// A Small LLVM Compiler for a Simple Functional Language
-// (includes an external lexer and parser)
-//
-//
-// call with -- prints out llvm code
-//
-// amm fun_llvm.sc main fact.fun
-// amm fun_llvm.sc main defs.fun
-//
-// or -- writes llvm code to disk
-//
-// amm fun_llvm.sc write fact.fun
-// amm fun_llvm.sc write defs.fun
-//
-// this will generate an .ll file.
-//
-// or -- runs the generated llvm code via lli
-//
-// amm fun_llvm.sc run fact.fun
-// amm fun_llvm.sc run defs.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+//
+// Use amm compiler.sc XXX.fun
+// ./XXX
+// This will generate XXX.ll, XXX.o as well as the binary program.
//
-//
-// You can interpret an .ll file using lli, for example
-//
-// lli fact.ll
-//
-// The optimiser can be invoked as
-//
-// opt -O1 -S in_file.ll > out_file.ll
-// opt -O3 -S in_file.ll > out_file.ll
-//
-// The code produced for the various architectures can be obtain with
-//
-// llc -march=x86 -filetype=asm in_file.ll -o -
-// llc -march=arm -filetype=asm in_file.ll -o -
-//
-// Producing an executable can be achieved by
-//
-// llc -filetype=obj in_file.ll
-// gcc in_file.o -o a.out
-// ./a.out
+// lexer + parser
import $file.fun_tokens, fun_tokens._
import $file.fun_parser, fun_parser._
-
// for generating new labels
var counter = -1
@@ -53,76 +20,51 @@
x ++ "_" ++ counter.toString()
}
+// typing
+type Ty = String
+type TyEnv = Map[String, Ty]
+
+// initial typing environment
+val initialEnv = Map[String, Ty]("skip" -> "Void", "print_int" -> "Void", "print_char" -> "Void",
+ "print_space" -> "Void", "print_star" -> "Void", "new_line" -> "Void")
+
+val typeConversion = Map("Int" -> "i32", "Double" -> "double", "Void" -> "void")
+
// Internal CPS language for FUN
abstract class KExp
abstract class KVal
-type Ty = String
-type TyEnv = Map[String, Ty]
-
case class KVar(s: String, ty: Ty = "UNDEF") extends KVal
-case class KLoad(v: KVal) extends KVal
-case class KNum(i: Int) extends KVal
-case class KFNum(i: Double) extends KVal
-case class KChr(c: Int) extends KVal
+case class KConst(s: String, ty: Ty = "UNDEF") extends KVal
+case class KNum(i: Int) extends KVal // known type
+case class KFNum(d: Float) extends KVal // known type
+case class KChConst(c: Int) extends KVal // known type
case class Kop(o: String, v1: KVal, v2: KVal, ty: Ty = "UNDEF") extends KVal
case class KCall(o: String, vrs: List[KVal], ty: Ty = "UNDEF") extends KVal
-case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
- override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2"
+case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
+ override def toString = s"LET $x = $e1 in \n$e2"
}
-case class KLet(x: String, e1: KVal, e2: KExp) extends KExp {
- override def toString = s"let $x = $e1 in \n$e2"
+case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp {
+ def pad(e: KExp) = e.toString.replaceAll("(?m)^", " ")
+
+ override def toString =
+ s"IF $x1\nTHEN\n${pad(e1)}\nELSE\n${pad(e2)}"
}
case class KReturn(v: KVal) extends KExp
-// typing K values
-def typ_val(v: KVal, ts: TyEnv) : (KVal, Ty) = v match {
- case KVar(s, _) => {
- val ty = ts.getOrElse(s, "TUNDEF")
- (KVar(s, ty), ty)
- }
- case Kop(op, v1, v2, _) => {
- val (tv1, ty1) = typ_val(v1, ts)
- val (tv2, ty2) = typ_val(v2, ts)
- if (ty1 == ty2) (Kop(op, tv1, tv2, ty1), ty1) else (Kop(op, tv1, tv2, "TMISMATCH"), "TMISMATCH")
- }
- case KCall(fname, args, _) => {
- val ty = ts.getOrElse(fname, "TCALLUNDEF" ++ fname)
- (KCall(fname, args.map(typ_val(_, ts)._1), ty), ty)
- }
- case KLoad(v) => {
- val (tv, ty) = typ_val(v, ts)
- (KLoad(tv), ty)
- }
- case KNum(i) => (KNum(i), "Int")
- case KFNum(i) => (KFNum(i), "Double")
- case KChr(c) => (KChr(c), "Int")
-}
-
-def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
- case KReturn(v) => KReturn(typ_val(v, ts)._1)
- case KLet(x: String, v: KVal, e: KExp) => {
- val (tv, ty) = typ_val(v, ts)
- KLet(x, tv, typ_exp(e, ts + (x -> ty)))
- }
- case KIf(b, e1, e2) => KIf(b, typ_exp(e1, ts), typ_exp(e2, ts))
-}
-
-
-
-
// CPS translation from Exps to KExps using a
// continuation k.
def CPS(e: Exp)(k: KVal => KExp) : KExp = e match {
- case Var(s) if (s.head.isUpper) => {
+ case Var(s) => {
+ if (s.head.isUpper) { // if this variable is a global
val z = Fresh("tmp")
- KLet(z, KLoad(KVar(s)), k(KVar(z)))
+ KLet(z, KConst(s), k(KVar(z)))
+ } else k(KVar(s))
}
- case Var(s) => k(KVar(s))
case Num(i) => k(KNum(i))
- case ChConst(c) => k(KChr(c))
- case FNum(i) => k(KFNum(i))
+ case FNum(d) => k(KFNum(d))
+ case ChConst(c) => k(KChConst(c))
case Aop(o, e1, e2) => {
val z = Fresh("tmp")
CPS(e1)(y1 =>
@@ -146,77 +88,122 @@
}
case Sequence(e1, e2) =>
CPS(e1)(_ => CPS(e2)(y2 => k(y2)))
-}
+}
-//initial continuation
+// initial continuation
def CPSi(e: Exp) = CPS(e)(KReturn)
-// some testcases
-val e1 = Aop("*", Var("a"), Num(3))
-CPSi(e1)
-val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4))
-CPSi(e2)
+// get type of KVal
+def get_typ_val(v: KVal) : Ty = v match {
+ case KNum(i) => "Int"
+ case KFNum(d) => "Double"
+ case KChConst(i) => "Int"
+ case KVar(name, ty) => ty
+ case KConst(name, ty) => ty
+ case Kop(o, v1, v2, ty) => ty
+ case KCall(o, vrs, ty) => ty
+}
-val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3)))
-CPSi(e3)
-
-val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3)))
-CPSi(e4)
-
-val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4))
-CPSi(e5)
+// update type information for KValues
+def typ_val(v: KVal, ts: TyEnv) : KVal = v match {
+ case KVar(name, ty) => {
+ if (ts.contains(name)) {
+ KVar(name, ts(name))
+ } else throw new Exception(s"Compile error: unknown type for $name")
+ }
+ case KConst(name, ty) => {
+ if (ts.contains(name)) {
+ KConst(name, ts(name))
+ } else throw new Exception(s"Compile error: unknown type for $name")
+ }
+ case Kop(o, v1, v2, ty) => {
+ val tv1 = typ_val(v1, ts)
+ val tv2 = typ_val(v2, ts)
+ val t1 = get_typ_val(tv1)
+ val t2 = get_typ_val(tv2)
+ if (t1 != t2) throw new Exception(s"Compile error: cannot compare $t1 with $t2")
+ Kop(o, tv1, tv2, t1)
+ }
+ case KCall(o, vrs, ty) => {
+ val new_vrs = vrs.map(vr => typ_val(vr, ts))
+ if (ts.contains(o)) {
+ KCall(o, new_vrs, ts(o))
+ } else throw new Exception(s"Compile error: unknown type for $o")
+ }
+ case x => x // no changes: KNum, KFNum, KChConst
+}
-val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40))
-CPSi(e6)
+// update type information for KExpressions
+def typ_exp(a: KExp, ts: TyEnv) : KExp = a match {
+ case KLet(x, e1, e2) => {
+ val te1 = typ_val(e1, ts)
+ val env1 = ts + (x -> get_typ_val(te1))
+ val te2 = typ_exp(e2, env1)
+ KLet(x, te1, te2)
+ }
+ case KIf(x1, e1, e2) => KIf(x1, typ_exp(e1, ts), typ_exp(e2, ts))
+ case KReturn(v) => KReturn(typ_val(v, ts))
+}
-val e7 = Call("foo", List(Num(3)))
-CPSi(e7)
+// prelude
+val prelude = """
+declare i32 @printf(i8*, ...)
-val e8 = Call("foo", List(Aop("*", Num(3), Num(1)), Num(4), Aop("+", Num(5), Num(6))))
-CPSi(e8)
+@.str_nl = private constant [2 x i8] c"\0A\00"
+@.str_star = private constant [2 x i8] c"*\00"
+@.str_space = private constant [2 x i8] c" \00"
+@.str_int = private constant [3 x i8] c"%d\00"
+@.str_c = private constant [3 x i8] c"%c\00"
+
+define void @new_line() #0 {
+ %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %t0)
+ ret void
+}
-val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6)))
-CPSi(e9)
+define void @print_star() #0 {
+ %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %t0)
+ ret void
+}
+
+define void @print_space() #0 {
+ %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %t0)
+ ret void
+}
-val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4))
-CPSi(e)
+define void @print_int(i32 %x) {
+ %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
+ ret void
+}
+define void @print_char(i32 %x) {
+ %t0 = getelementptr [3 x i8], [3 x i8]* @.str_c, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
+ ret void
+}
+define void @skip() #0 {
+ ret void
+}
+; END OF BUILT-IN FUNCTIONS (prelude)
+"""
// convenient string interpolations
// for instructions, labels and methods
import scala.language.implicitConversions
import scala.language.reflectiveCalls
-
-
-
-implicit def sring_inters(sc: StringContext) = new {
+implicit def string_inters(sc: StringContext) = new {
def i(args: Any*): String = " " ++ sc.s(args:_*) ++ "\n"
def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
def m(args: Any*): String = sc.s(args:_*) ++ "\n"
}
-def get_ty(s: String) = s match {
- case "Double" => "double"
- case "Void" => "void"
- case "Int" => "i32"
- case "Bool" => "i2"
- case _ => s
-}
-
-def compile_call_arg(a: KVal) = a match {
- case KNum(i) => s"i32 $i"
- case KFNum(i) => s"double $i"
- case KChr(c) => s"i32 $c"
- case KVar(s, ty) => s"${get_ty(ty)} %$s"
-}
-
-def compile_arg(s: (String, String)) = s"${get_ty(s._2)} %${s._1}"
-
-
// mathematical and boolean operations
def compile_op(op: String) = op match {
case "+" => "add i32 "
@@ -225,48 +212,70 @@
case "/" => "sdiv i32 "
case "%" => "srem i32 "
case "==" => "icmp eq i32 "
- case "!=" => "icmp ne i32 " // not equal
- case "<=" => "icmp sle i32 " // signed less or equal
- case "<" => "icmp slt i32 " // signed less than
+ case "!=" => "icmp ne i32 "
+ case "<=" => "icmp sle i32 "
+ case "<" => "icmp slt i32 "
+ case ">=" => "icmp sge i32 "
+ case ">" => "icmp sgt i32 "
}
def compile_dop(op: String) = op match {
case "+" => "fadd double "
case "*" => "fmul double "
case "-" => "fsub double "
+ case "/" => "fdiv double "
+ case "%" => "frem double "
case "==" => "fcmp oeq double "
- case "<=" => "fcmp ole double "
- case "<" => "fcmp olt double "
+ case "!=" => "fcmp one double "
+ case "<=" => "fcmp ole double "
+ case "<" => "fcmp olt double "
+ case ">=" => "icmp sge double "
+ case ">" => "icmp sgt double "
+}
+
+def compile_args(vrs: List[KVal]) : List[String] = vrs match {
+ case Nil => Nil
+ case x::xs => s"${typeConversion(get_typ_val(x))} ${compile_val(x)}" :: compile_args(xs)
}
// compile K values
def compile_val(v: KVal) : String = v match {
case KNum(i) => s"$i"
- case KFNum(i) => s"$i"
- case KChr(c) => s"$c"
- case KVar(s, ty) => s"%$s"
- case KLoad(KVar(s, ty)) => s"load ${get_ty(ty)}, ${get_ty(ty)}* @$s"
- case Kop(op, x1, x2, ty) => ty match {
- case "Int" => s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
- case "Double" => s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
- case _ => Kop(op, x1, x2, ty).toString
+ case KFNum(d) => s"$d"
+ case KChConst(i) => s"$i" // as integer
+ case KVar(s, ty) => s"%$s"
+ case KConst(s, ty) => {
+ val t = typeConversion(ty)
+ s"load $t, $t* @$s"
}
- case KCall(fname, args, ty) =>
- s"call ${get_ty(ty)} @$fname (${args.map(compile_call_arg).mkString(", ")})"
+ case Kop(op, x1, x2, ty) => {
+ if (ty == "Double") {
+ s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+ } else if (ty == "Int") {
+ s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}"
+ } else throw new Exception("Compile error: unknown type for comparison")
+ }
+ case KCall(x1, args, ty) => {
+ s"call ${typeConversion(ty)} @$x1 (${compile_args(args).mkString(", ")})"
+ }
}
// compile K expressions
def compile_exp(a: KExp) : String = a match {
- case KReturn(KVar("void", _)) =>
- i"ret void"
- case KReturn(KVar(x, ty)) =>
- i"ret ${get_ty(ty)} %$x"
- case KReturn(KNum(i)) =>
- i"ret i32 $i"
- case KLet(x: String, KCall(o: String, vrs: List[KVal], "Void"), e: KExp) =>
- i"${compile_val(KCall(o: String, vrs: List[KVal], "Void"))}" ++ compile_exp(e)
- case KLet(x: String, v: KVal, e: KExp) =>
- i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+ case KReturn(v) => {
+ val ty = get_typ_val(v)
+ if (ty == "Void") {
+ i"ret void"
+ } else {
+ i"ret ${typeConversion(ty)} ${compile_val(v)}"
+ }
+ }
+ case KLet(x: String, v: KVal, e: KExp) => {
+ val tv = get_typ_val(v)
+ if (tv == "Void") {
+ i"${compile_val(v)}" ++ compile_exp(e)
+ } else i"%$x = ${compile_val(v)}" ++ compile_exp(e)
+ }
case KIf(x, e1, e2) => {
val if_br = Fresh("if_branch")
val else_br = Fresh("else_branch")
@@ -278,100 +287,50 @@
}
}
-
-val prelude = """
-declare i32 @printf(i8*, ...)
-
-@.str_nl = private constant [2 x i8] c"\0A\00"
-@.str_star = private constant [2 x i8] c"*\00"
-@.str_space = private constant [2 x i8] c" \00"
-
-define void @new_line() #0 {
- %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0
- %1 = call i32 (i8*, ...) @printf(i8* %t0)
- ret void
-}
-
-define void @print_star() #0 {
- %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0
- %1 = call i32 (i8*, ...) @printf(i8* %t0)
- ret void
-}
-
-define void @print_space() #0 {
- %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0
- %1 = call i32 (i8*, ...) @printf(i8* %t0)
- ret void
-}
-
-define void @skip() #0 {
- ret void
-}
-
-@.str_int = private constant [3 x i8] c"%d\00"
-
-define void @print_int(i32 %x) {
- %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0
- call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
- ret void
-}
-
-@.str_char = private constant [3 x i8] c"%c\00"
-
-define void @print_char(i32 %x) {
- %t0 = getelementptr [3 x i8], [3 x i8]* @.str_char, i32 0, i32 0
- call i32 (i8*, ...) @printf(i8* %t0, i32 %x)
- ret void
-}
-
-; END OF BUILD-IN FUNCTIONS (prelude)
-
-"""
-
-def get_cont(ty: Ty) = ty match {
- case "Int" => KReturn
- case "Double" => KReturn
- case "Void" => { (_: KVal) => KReturn(KVar("void", "Void")) }
-}
-
-// compile function for declarations and main
-def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
- case Def(name, args, ty, body) => {
- val ts2 = ts + (name -> ty)
- val tkbody = typ_exp(CPS(body)(get_cont(ty)), ts2 ++ args.toMap)
- (m"define ${get_ty(ty)} @$name (${args.map(compile_arg).mkString(",")}) {" ++
- compile_exp(tkbody) ++
- m"}\n", ts2)
- }
- case Main(body) => {
- val tbody = typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts)
- (m"define i32 @main() {" ++
- compile_exp(tbody) ++
- m"}\n", ts)
- }
- case Const(name, n) => {
- (m"@$name = global i32 $n\n", ts + (name -> "Int"))
- }
- case FConst(name, x) => {
- (m"@$name = global double $x\n", ts + (name -> "Double"))
+def compile_def_args(args: List[(String, String)], ts: TyEnv) : (List[String], TyEnv) = args match {
+ case Nil => (Nil, ts)
+ case (n, t)::xs => {
+ if (t == "Void") throw new Exception("Compile error: argument of type void is invalid")
+ val (rest, env) = compile_def_args(xs, ts + (n -> t))
+ (s"${typeConversion(t)} %$n" :: rest, env)
}
}
-def compile_prog(prog: List[Decl], ty: TyEnv) : String = prog match {
- case Nil => ""
- case d::ds => {
- val (s2, ty2) = compile_decl(d, ty)
- s2 ++ compile_prog(ds, ty2)
+def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match {
+ case Const(name, value) => {
+ (m"@$name = global i32 $value\n", ts + (name -> "Int"))
+ }
+ case FConst(name, value) => {
+ (m"@$name = global double $value\n", ts + (name -> "Double"))
+ }
+ case Def(name, args, ty, body) => {
+ val (argList, env1) = compile_def_args(args, ts + (name -> ty))
+ (m"define ${typeConversion(ty)} @$name (${argList.mkString(", ")}) {" ++
+ compile_exp(typ_exp(CPSi(body), env1)) ++
+ m"}\n", ts + (name -> ty)) // don't preserve local variables in environment
+ }
+ case Main(body) => {
+ (m"define i32 @main() {" ++
+ compile_exp(typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts + ("main" -> "Int"))) ++
+ m"}\n", ts + ("main" -> "Int"))
}
}
-// main compiler functions
-def compile(prog: List[Decl]) : String =
- prelude ++ compile_prog(prog, Map("new_line" -> "Void", "skip" -> "Void",
- "print_star" -> "Void", "print_space" -> "Void",
- "print_int" -> "Void", "print_char" -> "Void"))
+// recursively update the typing environment while compiling
+def compile_block(prog: List[Decl], ts: TyEnv) : (String, TyEnv) = prog match {
+ case Nil => ("", ts)
+ case x::xs => {
+ val (compiled, env) = compile_decl(x, ts)
+ val (compiled_block, env1) = compile_block(xs, env)
+ (compiled ++ compiled_block, env1)
+ }
+}
-//import ammonite.ops._
+def fun_compile(prog: List[Decl]) : String = {
+ val tyenv = initialEnv
+ val (compiled, _) = compile_block(prog, tyenv)
+ prelude ++ compiled
+}
@main
@@ -379,8 +338,8 @@
val path = os.pwd / fname
val file = fname.stripSuffix("." ++ path.ext)
val tks = tokenise(os.read(path))
- val ast = parse_tks(tks)
- val code = compile(ast)
+ val ast = parse_tks(tks).head
+ val code = fun_compile(ast)
println(code)
}
@@ -389,8 +348,8 @@
val path = os.pwd / fname
val file = fname.stripSuffix("." ++ path.ext)
val tks = tokenise(os.read(path))
- val ast = parse_tks(tks)
- val code = compile(ast)
+ val ast = parse_tks(tks).head
+ val code = fun_compile(ast)
//println(code)
os.write.over(os.pwd / (file ++ ".ll"), code)
}
@@ -407,6 +366,3 @@
}
-
-
-
--- a/solutions/cw5/fun_parser.sc Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_parser.sc Fri Dec 09 11:00:05 2022 +0000
@@ -1,215 +1,263 @@
-// A parser for the Fun language
-//================================
-//
-// call with
-//
-// amm fun_parser.sc fact.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// parser: convert sequence of tokens to AST
+
//
-// amm fun_parser.sc defs.fun
+// Use this command to print parsed AST:
+// amm fun_parser.sc <name>.fun
//
-// this will generate a parse-tree from a list
-// of tokens
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
+import $file.fun_tokens, fun_tokens._
-import $file.fun_tokens, fun_tokens._
-
-
-// Parser combinators
-// type parameter I needs to be of Seq-type
-//
-abstract class Parser[I, T](implicit ev: I => Seq[_]) {
- def parse(ts: I): Set[(T, I)]
+// more convenience for the map parsers later on;
+// it allows writing nested patterns as
+// case x ~ y ~ z => ...
+case class ~[+A, +B](x: A, y: B)
- def parse_single(ts: I) : T =
- parse(ts).partition(_._2.isEmpty) match {
- case (good, _) if !good.isEmpty => good.head._1
- case (good, err) if err.isEmpty => {
- println (s"Parse Error\n $good \n $err") ; sys.exit(-1) }
- case (_, err) => {
- println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) }
- }
+// constraint for the input
+type IsSeq[A] = A => Seq[_]
+
+abstract class Parser[I : IsSeq, T]{
+ def parse(in: I): Set[(T, I)]
+
+ def parse_all(in: I) : Set[T] =
+ for ((hd, tl) <- parse(in);
+ if tl.isEmpty) yield hd
}
-// convenience for writing grammar rules
-case class ~[+A, +B](_1: A, _2: B)
+// parser combinators
-class SeqParser[I, T, S](p: => Parser[I, T],
- q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
+// sequence parser
+class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],
+ q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
+ def parse(in: I) =
+ for ((hd1, tl1) <- p.parse(in);
+ (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
}
-class AltParser[I, T](p: => Parser[I, T],
- q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+// alternative parser
+class AltParser[I : IsSeq, T](p: => Parser[I, T],
+ q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(in: I) = p.parse(in) ++ q.parse(in)
}
-class FunParser[I, T, S](p: => Parser[I, T],
- f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+// map parser
+class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
+ f: T => S) extends Parser[I, S] {
+ def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
}
-// convenient combinators
-implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
- def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
- def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
+// more convenient syntax for parser combinators
+implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
+ def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+ def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
}
-def ListParser[I, T, S](p: => Parser[I, T],
- q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
- (p ==> ((s) => List(s))) ||
- (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] }
-}
+// -------------------------------------------------
+// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
+// atomic parser for types
+case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
case _ => Set()
}
}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-implicit def TokOps(t: Token) = new {
- def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
- def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
- def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
+// atomic parser for global ids
+case object GlobalIdParser extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
+ case _ => Set()
+ }
}
-case object EmptyParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = Set(("", ts))
+// atomic parser for ids
+case object IdParser extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
+ case _ => Set()
+ }
}
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(n)::ts => Set((n, ts))
- case _ => Set ()
+// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
+case object DoubleParser extends Parser[Tokens, Float] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
+ case _ => Set()
}
}
-case object FNumParser extends Parser[List[Token], Double] {
- def parse(ts: List[Token]) = ts match {
- case T_FNUM(x)::ts => Set((x, ts))
+// atomic parser for integers
+case object IntParser extends Parser[Tokens, Int] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
+ case _ => Set()
+ }
+}
+
+// atomic parser for operators
+case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
+ case _ => Set()
+ }
+}
+
+// atomic parser for character
+case object CharParser extends Parser[Tokens, Char] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "ch" => {
+ val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes
+ stripped match {
+ case "\\n" => Set(('\n', tkns))
+ case "\\t" => Set(('\t', tkns))
+ case "\\r" => Set(('\r', tkns))
+ case c => Set((c(0), tkns))
+ }
+ }
case _ => Set()
}
}
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
+// parser for list of arguments
+def ListParser[I, T, S](p: => Parser[I, T],
+ q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
+ (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
+ (p.map((s) => List(s)))
+}
+
+// I may want to write string interpolations for:
+// keywords, semicolon, colon, comma, parentheses
+case class StrParser(s: String) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._2 == s => Set((s, tkns))
+ case _ => Set()
}
}
-case object CharConstParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_CHR(c)::ts => Set((c, ts))
- case _ => Set ()
- }
-}
-
-case object TyParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_TY(s)::ts => Set((s, ts))
- case _ => Set ()
- }
+implicit def parser_interpolation(sc: StringContext) = new {
+ def p(args: Any*) = StrParser(sc.s(args:_*))
}
-// Abstract syntax trees for the Fun language
-abstract class Exp
-abstract class BExp
-abstract class Decl
+// the AST datastructures for the FUN language
+
+abstract class Exp
+abstract class BExp
+abstract class Decl
case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
case class Main(e: Exp) extends Decl
case class Const(name: String, v: Int) extends Decl
-case class FConst(name: String, x: Double) extends Decl
+case class FConst(name: String, x: Float) extends Decl
case class Call(name: String, args: List[Exp]) extends Exp
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
case class Var(s: String) extends Exp
-case class Num(i: Int) extends Exp // integer numbers
-case class FNum(i: Double) extends Exp // floating numbers
-case class ChConst(c: Int) extends Exp // char constant
+case class Num(i: Int) extends Exp // integer numbers
+case class FNum(i: Float) extends Exp // float numbers
+case class ChConst(c: Int) extends Exp // character constants
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
-case class Sequence(e1: Exp, e2: Exp) extends Exp
+case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons
+
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
-// arithmetic expressions (there needs to be an F in the SEMICOLON case)
-lazy val Exp: Parser[List[Token], Exp] =
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
- { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
- (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L
-lazy val L: Parser[List[Token], Exp] =
- (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } ||
- (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T
-lazy val T: Parser[List[Token], Exp] =
- (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } ||
- (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } ||
- (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F
-lazy val F: Parser[List[Token], Exp] =
- (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } ||
- (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
- (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } ||
- IdParser ==> { case x => Var(x): Exp } ||
- NumParser ==> { case x => Num(x): Exp } ||
- CharConstParser ==> { case x => ChConst(x): Exp } ||
- FNumParser ==> { case x => FNum(x): Exp }
+lazy val Exps: Parser[Tokens, Exp] =
+ (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
+ Exp
+
+lazy val Exp: Parser[Tokens, Exp] =
+ (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
+ M
+
+lazy val M: Parser[Tokens, Exp] =
+ (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+ T
+
+lazy val T: Parser[Tokens, Exp] =
+ (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+ U
+
+// includes negative factor
+// a + - b CAN be recognised
+// - - - b CAN be recognised
+lazy val U: Parser[Tokens, Exp] =
+ (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
+ (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
+ F
+
+lazy val F: Parser[Tokens, Exp] =
+ (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
+ (p"skip").map(_ => Call("skip", Nil)) || // hardcoded
+ (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded
+ (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
+ (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted!
+ (IdParser || GlobalIdParser).map(x => Var(x)) ||
+ IntParser.map(x => Num(x)) ||
+ DoubleParser.map(x => FNum(x)) ||
+ CharParser.map(x => ChConst(x.toInt)) ||
+ (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } ||
- (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } ||
- (Exp ~ T_OP("<") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } ||
- (Exp ~ T_OP(">") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", z, x): BExp } ||
- (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } ||
- (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } ||
- (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp }
+lazy val BExp: Parser[Tokens, BExp] =
+ (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
+ (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
+
+lazy val TypedIdParser: Parser[Tokens, (String, String)] =
+ (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
-lazy val Arg : Parser[List[Token], (String, String)] =
- (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) }
+lazy val Defn: Parser[Tokens, Decl] =
+ (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
+ } ||
+ (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+ case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
+ }
-lazy val Defn: Parser[List[Token], Decl] = {
- (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
- { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } ||
- (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
- { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl }
-}
+lazy val Constp: Parser[Tokens, Decl] =
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
+ case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
+ } ||
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
+ case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
+ }
-lazy val Const_decl: Parser[List[Token], Decl] =
- (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==>
- { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } ||
- (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==>
- { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl }
+// Int can be converted to Double but not viceversa
+lazy val FConstp: Parser[Tokens, Decl] =
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+ case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
+ } ||
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+ case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
+ }
-lazy val Prog: Parser[List[Token], List[Decl]] =
- (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
- (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
- (Exp ==> ((s) => List(Main(s)) : List[Decl]))
+// Prog consists of global const declarations, f(x) defs, and exp in ANY order
+// restricted to main body at the bottom
+lazy val Prog: Parser[Tokens, List[Decl]] =
+ (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ Exp.map[List[Decl]](s => List(Main(s)))
+def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
-// Reading tokens and Writing parse trees
+import scala.io.Source._
-//import ammonite.ops._
-
-def parse_tks(tks: List[Token]) : List[Decl] = {
- //println(Prog.parse(tks))
- Prog.parse_single(tks)
+@main
+def parse(filename: String) = {
+ val fun_code = fromFile(filename).getLines.mkString("\n")
+ // print the AST list to screen
+ println(parse_tks(tokenise(fun_code)))
}
-
-//@doc("Parses a file.")
-@main
-def main(fname: String) : Unit = {
- val tks = tokenise(os.read(os.pwd / fname))
- println(parse_tks(tks))
-}
-
-
--- a/solutions/cw5/fun_tokens.sc Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_tokens.sc Fri Dec 09 11:00:05 2022 +0000
@@ -1,27 +1,31 @@
-// A tokeniser for the Fun language
-//==================================
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// lexer
+
//
-// call with
-//
-// amm fun_tokens.sc fact.fun
-//
-// amm fun_tokens.sc defs.fun
+// Use this command to print the list of tokens:
+// amm fun_token.sc <name>.fun
//
-
+type Token = (String, String)
+type Tokens = List[Token]
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
-
-abstract class Rexp
+// regular expressions including records
+abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class RECD(x: String, r: Rexp) extends Rexp
-
+case class RANGE(chars: List[Char]) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class OPTIONAL(r: Rexp) extends Rexp
+case class PLUS(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+case class RECD(x: String, r: Rexp) extends Rexp // records for extracting strings or tokens
+
+// values
abstract class Val
case object Empty extends Val
case class Chr(c: Char) extends Val
@@ -29,20 +33,27 @@
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
+case class Opt(v: Val) extends Val
+case class Pls(vs: List[Val]) extends Val
+case class Nt(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
-
+
// some convenience for typing in regular expressions
def charlist2rexp(s : List[Char]): Rexp = s match {
case Nil => ONE
case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ case c::vs => SEQ(CHAR(c), charlist2rexp(vs))
}
-implicit def string2rexp(s : String) : Rexp =
+
+implicit def string2rexp(s : String) : Rexp =
charlist2rexp(s.toList)
implicit def RexpOps(r: Rexp) = new {
def | (s: Rexp) = ALT(r, s)
def % = STAR(r)
+ def ? = OPTIONAL(r)
+ def + = PLUS(r)
+ def ^ (n: Int) = NTIMES(r, n)
def ~ (s: Rexp) = SEQ(r, s)
}
@@ -50,66 +61,89 @@
def | (r: Rexp) = ALT(s, r)
def | (r: String) = ALT(s, r)
def % = STAR(s)
+ def ? = OPTIONAL(s)
+ def + = PLUS(s)
+ def ^ (n: Int) = NTIMES(s, n)
def ~ (r: Rexp) = SEQ(s, r)
def ~ (r: String) = SEQ(s, r)
def $ (r: Rexp) = RECD(s, r)
}
-def nullable (r: Rexp) : Boolean = r match {
+def nullable(r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
case CHAR(_) => false
+ case RANGE(_) => false
case ALT(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
case STAR(_) => true
+ case OPTIONAL(r1) => true
+ case PLUS(r1) => nullable(r1)
+ case NTIMES(r1, n) => if (n == 0) true else nullable(r1)
case RECD(_, r1) => nullable(r1)
}
-def der (c: Char, r: Rexp) : Rexp = r match {
+def der(c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
+ case RANGE(chars) => if (chars.contains(c)) ONE else ZERO
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
+ case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r) => SEQ(der(c, r), STAR(r))
+ case OPTIONAL(r) => der(c, r)
+ case PLUS(r) => SEQ(der(c, r), STAR(r))
+ case NTIMES(r1, n) => if (n == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, n - 1))
case RECD(_, r1) => der(c, r1)
}
-
-// extracts a string from value
+// extracts a string from a value
def flatten(v: Val) : String = v match {
case Empty => ""
case Chr(c) => c.toString
case Left(v) => flatten(v)
case Right(v) => flatten(v)
- case Sequ(v1, v2) => flatten(v1) + flatten(v2)
+ case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
case Stars(vs) => vs.map(flatten).mkString
+ case Opt(v) => flatten(v)
+ case Pls(vs) => vs.map(flatten).mkString
+ case Nt(vs) => vs.map(flatten).mkString
case Rec(_, v) => flatten(v)
}
// extracts an environment from a value;
-// used for tokenise a string
-def env(v: Val) : List[(String, String)] = v match {
+// used for tokenising a string
+def env(v: Val) : Tokens = v match {
case Empty => Nil
case Chr(c) => Nil
case Left(v) => env(v)
case Right(v) => env(v)
case Sequ(v1, v2) => env(v1) ::: env(v2)
case Stars(vs) => vs.flatMap(env)
+ case Opt(v) => env(v)
+ case Pls(vs) => vs.flatMap(env)
+ case Nt(vs) => vs.flatMap(env)
case Rec(x, v) => (x, flatten(v))::env(v)
}
-// The Injection Part of the lexer
+
+// The injection and mkeps part of the lexer
+//===========================================
def mkeps(r: Rexp) : Val = r match {
case ONE => Empty
- case ALT(r1, r2) =>
+ case RANGE(chars) => throw new Exception("lexing error") // this will never be called but the coursework asks for it so...
+ case ALT(r1, r2) =>
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
case STAR(r) => Stars(Nil)
+ case OPTIONAL(r) => Opt(Empty)
+ case PLUS(r) => Pls(List(mkeps(r))) // scala define a list with one element
+ case NTIMES(r, n) => if (n == 0) Nt(Nil) else Nt(List.fill(n)(mkeps(r))) // wrong
case RECD(x, r) => Rec(x, mkeps(r))
+ case _ => throw new Exception("lexing error")
}
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
@@ -119,9 +153,12 @@
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
- case (CHAR(d), Empty) => Chr(c)
+ case (CHAR(d), Empty) => Chr(c)
+ case (RANGE(chars), Empty) => Chr(c)
+ case (OPTIONAL(r1), v) => Opt(inj(r1, c, v))
+ case (PLUS(r1), Sequ(v1, Stars(vs))) => Pls(inj(r1, c, v1)::vs)
+ case (NTIMES(r1, n), Sequ(v1, Nt(vs))) => Nt(inj(r1, c, v1)::vs)
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
- case _ => { println ("Injection error") ; sys.exit(-1) }
}
// some "rectification" functions for simplification
@@ -135,15 +172,14 @@
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
}
-def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
(v:Val) => Sequ(f1(Empty), f2(v))
-def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
(v:Val) => Sequ(f1(v), f2(Empty))
-def F_RECD(f: Val => Val) = (v:Val) => v match {
- case Rec(x, v) => Rec(x, f(v))
-}
+
def F_ERROR(v: Val): Val = throw new Exception("error")
+// simplification
def simp(r: Rexp): (Rexp, Val => Val) = r match {
case ALT(r1, r2) => {
val (r1s, f1s) = simp(r1)
@@ -152,7 +188,7 @@
case (ZERO, _) => (r2s, F_RIGHT(f2s))
case (_, ZERO) => (r1s, F_LEFT(f1s))
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
- else (ALT (r1s, r2s), F_ALT(f1s, f2s))
+ else (ALT (r1s, r2s), F_ALT(f1s, f2s))
}
}
case SEQ(r1, r2) => {
@@ -166,115 +202,75 @@
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
}
}
- case RECD(x, r1) => {
- val (r1s, f1s) = simp(r1)
- (RECD(x, r1s), F_RECD(f1s))
- }
case r => (r, F_ID)
}
// lexing functions including simplification
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) }
+ case Nil => if (nullable(r)) mkeps(r) else
+ { throw new Exception("lexing error") }
case c::cs => {
val (r_simp, f_simp) = simp(der(c, r))
inj(r, c, f_simp(lex_simp(r_simp, cs)))
}
}
-def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
+def lexing_simp(r: Rexp, s: String) =
+ env(lex_simp(r, s.toList))
-// The Lexing Rules for the Fun Language
-
-def PLUS(r: Rexp) = r ~ r.%
-def OPT(r: Rexp) = r | ONE
+// FUN language lexer
-val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" |
- "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" |
- "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" |"E" | "F" | "G" |
- "H" | "I" | "J" | "K" |"L" | "M" | "N" |
- "O" | "P" | "Q" | "R" |"S" | "T" | "U" |
- "V" | "W" | "X" | "Y" | "Z" | "_" | ":"
-val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val ID = SYM ~ (SYM | DIGIT).%
-val NUM = PLUS(DIGIT)
-val FNUM = OPT("-") ~ NUM ~ "." ~ NUM
-val KEYWORD : Rexp = "if" | "then" | "else" | "def" | "val"
-val TYPE : Rexp = "Void" | "Int" | "Double"
-val SEMI: Rexp = ";"
-val COLON: Rexp = ":"
-val COMMA: Rexp = ","
-val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
-val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
-val RPAREN: Rexp = ")" | "}"
-val LPAREN: Rexp = "(" | "{"
-val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "-" | "." | "\"" | "=" | "," | "(" | ")" | "{" | "}"
-val ALL2 = ALL | "\n"
-val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
-
-val CHR :Rexp = "'" ~ (ALL | "\\n") ~ "'"
+val DIGIT = RANGE("0123456789".toList)
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val SYM = RANGE("!\"#$%&'()*+,-./:;<>=?`@[]\\^_{}|~".toList) // I referenced the CPP ASCII table https://en.cppreference.com/w/cpp/language/ascii
-val FUN_REGS = (("k" $ KEYWORD) |
- ("t" $ TYPE) |
- ("i" $ ID) |
- ("ch" $ CHR) |
- ("o" $ OP) |
- ("n" $ NUM) |
- ("f" $ FNUM) |
- ("s" $ SEMI) |
- ("co" $ COLON) |
- ("c" $ COMMA) |
- ("pl" $ LPAREN) |
- ("pr" $ RPAREN) |
- ("w" $ (WHITESPACE | COMMENT))).%
-
-
-
-// The tokens for the Fun language
+val KEYWORD : Rexp = "val" | "if" | "then" | "else" | "def" | "skip" // "skip" is hardcoded because hanoi.fun calls skip() without parentheses
+val TYPE : Rexp = "Int" | "Double" | "Void"
+val GLOBAL_ID : Rexp = UPPERCASE ~ ("_" | LOWERCASE | DIGIT | UPPERCASE).% // start with capital letter and followed by any case
+val ID : Rexp = LOWERCASE ~ ("_" | UPPERCASE | LOWERCASE | DIGIT).% // start with lowercase
+val SEMI : Rexp = ";"
+val COLON : Rexp = ":"
+val OP : Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" // no && and || operators
+val INT : Rexp = DIGIT.+
+val DOUBLE : Rexp = DIGIT.+ ~ "." ~ DIGIT.+ // negative numbers sign is lexed as operator, but the parser will identify negative numbers
+val COMMA : Rexp = ","
+val WHITESPACES: Rexp = (" " | "\n" | "\t" | "\r").+ // whitespaces are either " " or \n or \t or \r
+val LPAREN : Rexp = RANGE("({".toList)
+val RPAREN : Rexp = RANGE(")}".toList)
+val CH : Rexp = "'" ~ (LOWERCASE | UPPERCASE | DIGIT | SYM | " " | "\\n" | "\\t" | "\\r") ~ "'" // \n, \t and \r should also be tokenized, any character should be, whitespaces too
+val COMMENT : Rexp = ("//" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \t\r".toList)).% ~ "\n") | ("/*" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \n\t\r".toList)).% ~ "*/")
-abstract class Token extends Serializable
-case object T_SEMI extends Token
-case object T_COMMA extends Token
-case object T_COLON extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class T_ID(s: String) extends Token
-case class T_FID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_FNUM(x: Double) extends Token
-case class T_KWD(s: String) extends Token
-case class T_TY(s: String) extends Token
-case class T_CHR(i: Int) extends Token
+val FUN_REGS = (("keyword" $ KEYWORD) |
+ ("type" $ TYPE) |
+ ("global" $ GLOBAL_ID) |
+ ("id" $ ID) |
+ ("op" $ OP) |
+ ("double" $ DOUBLE) |
+ ("int" $ INT) |
+ ("semi" $ SEMI) |
+ ("colon" $ COLON) |
+ ("comma" $ COMMA) |
+ ("ch" $ CH) |
+ ("par" $ (LPAREN | RPAREN)) |
+ COMMENT | WHITESPACES).%
-val token : PartialFunction[(String, String), Token] = {
- case ("k", s) => T_KWD(s)
- case ("t", s) => T_TY(s)
- case ("i", s) => T_ID(s)
- case ("o", s) => T_OP(s)
- case ("n", s) => T_NUM(s.toInt)
- case ("ch", s) => if (s == "'\\n'") T_CHR(10) else T_CHR(s(1).toInt)
- case ("f", s) => T_FNUM(s.toDouble)
- case ("s", _) => T_SEMI
- case ("c", _) => T_COMMA
- case ("co", _) => T_COLON
- case ("pl", _) => T_LPAREN
- case ("pr", _) => T_RPAREN
+def fun_lex(program: String) : Tokens = {
+ lexing_simp(FUN_REGS, program)
}
-
-def tokenise(s: String) : List[Token] = {
- val tks = lexing_simp(FUN_REGS, s).collect(token)
- if (tks.length != 0) tks
- else { println (s"Tokenise Error") ; sys.exit(-1) }
+def tokenise(program: String) : Tokens = {
+ lexing_simp(FUN_REGS, program)
}
-//import ammonite.ops._
+import scala.io.Source._
-//@doc("Tokenising a file.")
@main
-def main(fname: String) = {
- println(tokenise(os.read(os.pwd / fname)))
+def lex(filename: String) = {
+ // read file
+ val fun_code = fromFile(filename).getLines.mkString("\n")
+ // print tokens to screen
+ println(fun_lex(fun_code).mkString("\n"))
}