# HG changeset patch # User Christian Urban # Date 1670583605 0 # Node ID 2f86ebda3629a401c16e6cebe89af585abdb3068 # Parent b40aaffe07932350ba6e8a003c1c3939b7d53687 updated diff -r b40aaffe0793 -r 2f86ebda3629 progs/fun/fa0.fun --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fun/fa0.fun Fri Dec 09 11:00:05 2022 +0000 @@ -0,0 +1,5 @@ +def fact(n) = + (if n == 0 then 1 else n * fact(n - 1)); + + +write(fact(6)) diff -r b40aaffe0793 -r 2f86ebda3629 solutions/cw5/fun_llvm.sc --- a/solutions/cw5/fun_llvm.sc Sat Dec 03 21:58:47 2022 +0000 +++ b/solutions/cw5/fun_llvm.sc Fri Dec 09 11:00:05 2022 +0000 @@ -1,50 +1,17 @@ -// A Small LLVM Compiler for a Simple Functional Language -// (includes an external lexer and parser) -// -// -// call with -- prints out llvm code -// -// amm fun_llvm.sc main fact.fun -// amm fun_llvm.sc main defs.fun -// -// or -- writes llvm code to disk -// -// amm fun_llvm.sc write fact.fun -// amm fun_llvm.sc write defs.fun -// -// this will generate an .ll file. -// -// or -- runs the generated llvm code via lli -// -// amm fun_llvm.sc run fact.fun -// amm fun_llvm.sc run defs.fun +// Author: Zhuo Ying Jiang Li +// Starting code by Dr Christian Urban + +// +// Use amm compiler.sc XXX.fun +// ./XXX +// This will generate XXX.ll, XXX.o as well as the binary program. // -// -// You can interpret an .ll file using lli, for example -// -// lli fact.ll -// -// The optimiser can be invoked as -// -// opt -O1 -S in_file.ll > out_file.ll -// opt -O3 -S in_file.ll > out_file.ll -// -// The code produced for the various architectures can be obtain with -// -// llc -march=x86 -filetype=asm in_file.ll -o - -// llc -march=arm -filetype=asm in_file.ll -o - -// -// Producing an executable can be achieved by -// -// llc -filetype=obj in_file.ll -// gcc in_file.o -o a.out -// ./a.out +// lexer + parser import $file.fun_tokens, fun_tokens._ import $file.fun_parser, fun_parser._ - // for generating new labels var counter = -1 @@ -53,76 +20,51 @@ x ++ "_" ++ counter.toString() } +// typing +type Ty = String +type TyEnv = Map[String, Ty] + +// initial typing environment +val initialEnv = Map[String, Ty]("skip" -> "Void", "print_int" -> "Void", "print_char" -> "Void", + "print_space" -> "Void", "print_star" -> "Void", "new_line" -> "Void") + +val typeConversion = Map("Int" -> "i32", "Double" -> "double", "Void" -> "void") + // Internal CPS language for FUN abstract class KExp abstract class KVal -type Ty = String -type TyEnv = Map[String, Ty] - case class KVar(s: String, ty: Ty = "UNDEF") extends KVal -case class KLoad(v: KVal) extends KVal -case class KNum(i: Int) extends KVal -case class KFNum(i: Double) extends KVal -case class KChr(c: Int) extends KVal +case class KConst(s: String, ty: Ty = "UNDEF") extends KVal +case class KNum(i: Int) extends KVal // known type +case class KFNum(d: Float) extends KVal // known type +case class KChConst(c: Int) extends KVal // known type case class Kop(o: String, v1: KVal, v2: KVal, ty: Ty = "UNDEF") extends KVal case class KCall(o: String, vrs: List[KVal], ty: Ty = "UNDEF") extends KVal -case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp { - override def toString = s"KIf $x1\nIF\n$e1\nELSE\n$e2" +case class KLet(x: String, e1: KVal, e2: KExp) extends KExp { + override def toString = s"LET $x = $e1 in \n$e2" } -case class KLet(x: String, e1: KVal, e2: KExp) extends KExp { - override def toString = s"let $x = $e1 in \n$e2" +case class KIf(x1: String, e1: KExp, e2: KExp) extends KExp { + def pad(e: KExp) = e.toString.replaceAll("(?m)^", " ") + + override def toString = + s"IF $x1\nTHEN\n${pad(e1)}\nELSE\n${pad(e2)}" } case class KReturn(v: KVal) extends KExp -// typing K values -def typ_val(v: KVal, ts: TyEnv) : (KVal, Ty) = v match { - case KVar(s, _) => { - val ty = ts.getOrElse(s, "TUNDEF") - (KVar(s, ty), ty) - } - case Kop(op, v1, v2, _) => { - val (tv1, ty1) = typ_val(v1, ts) - val (tv2, ty2) = typ_val(v2, ts) - if (ty1 == ty2) (Kop(op, tv1, tv2, ty1), ty1) else (Kop(op, tv1, tv2, "TMISMATCH"), "TMISMATCH") - } - case KCall(fname, args, _) => { - val ty = ts.getOrElse(fname, "TCALLUNDEF" ++ fname) - (KCall(fname, args.map(typ_val(_, ts)._1), ty), ty) - } - case KLoad(v) => { - val (tv, ty) = typ_val(v, ts) - (KLoad(tv), ty) - } - case KNum(i) => (KNum(i), "Int") - case KFNum(i) => (KFNum(i), "Double") - case KChr(c) => (KChr(c), "Int") -} - -def typ_exp(a: KExp, ts: TyEnv) : KExp = a match { - case KReturn(v) => KReturn(typ_val(v, ts)._1) - case KLet(x: String, v: KVal, e: KExp) => { - val (tv, ty) = typ_val(v, ts) - KLet(x, tv, typ_exp(e, ts + (x -> ty))) - } - case KIf(b, e1, e2) => KIf(b, typ_exp(e1, ts), typ_exp(e2, ts)) -} - - - - // CPS translation from Exps to KExps using a // continuation k. def CPS(e: Exp)(k: KVal => KExp) : KExp = e match { - case Var(s) if (s.head.isUpper) => { + case Var(s) => { + if (s.head.isUpper) { // if this variable is a global val z = Fresh("tmp") - KLet(z, KLoad(KVar(s)), k(KVar(z))) + KLet(z, KConst(s), k(KVar(z))) + } else k(KVar(s)) } - case Var(s) => k(KVar(s)) case Num(i) => k(KNum(i)) - case ChConst(c) => k(KChr(c)) - case FNum(i) => k(KFNum(i)) + case FNum(d) => k(KFNum(d)) + case ChConst(c) => k(KChConst(c)) case Aop(o, e1, e2) => { val z = Fresh("tmp") CPS(e1)(y1 => @@ -146,77 +88,122 @@ } case Sequence(e1, e2) => CPS(e1)(_ => CPS(e2)(y2 => k(y2))) -} +} -//initial continuation +// initial continuation def CPSi(e: Exp) = CPS(e)(KReturn) -// some testcases -val e1 = Aop("*", Var("a"), Num(3)) -CPSi(e1) -val e2 = Aop("+", Aop("*", Var("a"), Num(3)), Num(4)) -CPSi(e2) +// get type of KVal +def get_typ_val(v: KVal) : Ty = v match { + case KNum(i) => "Int" + case KFNum(d) => "Double" + case KChConst(i) => "Int" + case KVar(name, ty) => ty + case KConst(name, ty) => ty + case Kop(o, v1, v2, ty) => ty + case KCall(o, vrs, ty) => ty +} -val e3 = Aop("+", Num(2), Aop("*", Var("a"), Num(3))) -CPSi(e3) - -val e4 = Aop("+", Aop("-", Num(1), Num(2)), Aop("*", Var("a"), Num(3))) -CPSi(e4) - -val e5 = If(Bop("==", Num(1), Num(1)), Num(3), Num(4)) -CPSi(e5) +// update type information for KValues +def typ_val(v: KVal, ts: TyEnv) : KVal = v match { + case KVar(name, ty) => { + if (ts.contains(name)) { + KVar(name, ts(name)) + } else throw new Exception(s"Compile error: unknown type for $name") + } + case KConst(name, ty) => { + if (ts.contains(name)) { + KConst(name, ts(name)) + } else throw new Exception(s"Compile error: unknown type for $name") + } + case Kop(o, v1, v2, ty) => { + val tv1 = typ_val(v1, ts) + val tv2 = typ_val(v2, ts) + val t1 = get_typ_val(tv1) + val t2 = get_typ_val(tv2) + if (t1 != t2) throw new Exception(s"Compile error: cannot compare $t1 with $t2") + Kop(o, tv1, tv2, t1) + } + case KCall(o, vrs, ty) => { + val new_vrs = vrs.map(vr => typ_val(vr, ts)) + if (ts.contains(o)) { + KCall(o, new_vrs, ts(o)) + } else throw new Exception(s"Compile error: unknown type for $o") + } + case x => x // no changes: KNum, KFNum, KChConst +} -val e6 = If(Bop("!=", Num(10), Num(10)), e5, Num(40)) -CPSi(e6) +// update type information for KExpressions +def typ_exp(a: KExp, ts: TyEnv) : KExp = a match { + case KLet(x, e1, e2) => { + val te1 = typ_val(e1, ts) + val env1 = ts + (x -> get_typ_val(te1)) + val te2 = typ_exp(e2, env1) + KLet(x, te1, te2) + } + case KIf(x1, e1, e2) => KIf(x1, typ_exp(e1, ts), typ_exp(e2, ts)) + case KReturn(v) => KReturn(typ_val(v, ts)) +} -val e7 = Call("foo", List(Num(3))) -CPSi(e7) +// prelude +val prelude = """ +declare i32 @printf(i8*, ...) -val e8 = Call("foo", List(Aop("*", Num(3), Num(1)), Num(4), Aop("+", Num(5), Num(6)))) -CPSi(e8) +@.str_nl = private constant [2 x i8] c"\0A\00" +@.str_star = private constant [2 x i8] c"*\00" +@.str_space = private constant [2 x i8] c" \00" +@.str_int = private constant [3 x i8] c"%d\00" +@.str_c = private constant [3 x i8] c"%c\00" + +define void @new_line() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0) + ret void +} -val e9 = Sequence(Aop("*", Var("a"), Num(3)), Aop("+", Var("b"), Num(6))) -CPSi(e9) +define void @print_star() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0) + ret void +} + +define void @print_space() #0 { + %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0) + ret void +} -val e = Aop("*", Aop("+", Num(1), Call("foo", List(Var("a"), Num(3)))), Num(4)) -CPSi(e) +define void @print_int(i32 %x) { + %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0, i32 %x) + ret void +} +define void @print_char(i32 %x) { + %t0 = getelementptr [3 x i8], [3 x i8]* @.str_c, i32 0, i32 0 + call i32 (i8*, ...) @printf(i8* %t0, i32 %x) + ret void +} +define void @skip() #0 { + ret void +} +; END OF BUILT-IN FUNCTIONS (prelude) +""" // convenient string interpolations // for instructions, labels and methods import scala.language.implicitConversions import scala.language.reflectiveCalls - - - -implicit def sring_inters(sc: StringContext) = new { +implicit def string_inters(sc: StringContext) = new { def i(args: Any*): String = " " ++ sc.s(args:_*) ++ "\n" def l(args: Any*): String = sc.s(args:_*) ++ ":\n" def m(args: Any*): String = sc.s(args:_*) ++ "\n" } -def get_ty(s: String) = s match { - case "Double" => "double" - case "Void" => "void" - case "Int" => "i32" - case "Bool" => "i2" - case _ => s -} - -def compile_call_arg(a: KVal) = a match { - case KNum(i) => s"i32 $i" - case KFNum(i) => s"double $i" - case KChr(c) => s"i32 $c" - case KVar(s, ty) => s"${get_ty(ty)} %$s" -} - -def compile_arg(s: (String, String)) = s"${get_ty(s._2)} %${s._1}" - - // mathematical and boolean operations def compile_op(op: String) = op match { case "+" => "add i32 " @@ -225,48 +212,70 @@ case "/" => "sdiv i32 " case "%" => "srem i32 " case "==" => "icmp eq i32 " - case "!=" => "icmp ne i32 " // not equal - case "<=" => "icmp sle i32 " // signed less or equal - case "<" => "icmp slt i32 " // signed less than + case "!=" => "icmp ne i32 " + case "<=" => "icmp sle i32 " + case "<" => "icmp slt i32 " + case ">=" => "icmp sge i32 " + case ">" => "icmp sgt i32 " } def compile_dop(op: String) = op match { case "+" => "fadd double " case "*" => "fmul double " case "-" => "fsub double " + case "/" => "fdiv double " + case "%" => "frem double " case "==" => "fcmp oeq double " - case "<=" => "fcmp ole double " - case "<" => "fcmp olt double " + case "!=" => "fcmp one double " + case "<=" => "fcmp ole double " + case "<" => "fcmp olt double " + case ">=" => "icmp sge double " + case ">" => "icmp sgt double " +} + +def compile_args(vrs: List[KVal]) : List[String] = vrs match { + case Nil => Nil + case x::xs => s"${typeConversion(get_typ_val(x))} ${compile_val(x)}" :: compile_args(xs) } // compile K values def compile_val(v: KVal) : String = v match { case KNum(i) => s"$i" - case KFNum(i) => s"$i" - case KChr(c) => s"$c" - case KVar(s, ty) => s"%$s" - case KLoad(KVar(s, ty)) => s"load ${get_ty(ty)}, ${get_ty(ty)}* @$s" - case Kop(op, x1, x2, ty) => ty match { - case "Int" => s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}" - case "Double" => s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}" - case _ => Kop(op, x1, x2, ty).toString + case KFNum(d) => s"$d" + case KChConst(i) => s"$i" // as integer + case KVar(s, ty) => s"%$s" + case KConst(s, ty) => { + val t = typeConversion(ty) + s"load $t, $t* @$s" } - case KCall(fname, args, ty) => - s"call ${get_ty(ty)} @$fname (${args.map(compile_call_arg).mkString(", ")})" + case Kop(op, x1, x2, ty) => { + if (ty == "Double") { + s"${compile_dop(op)} ${compile_val(x1)}, ${compile_val(x2)}" + } else if (ty == "Int") { + s"${compile_op(op)} ${compile_val(x1)}, ${compile_val(x2)}" + } else throw new Exception("Compile error: unknown type for comparison") + } + case KCall(x1, args, ty) => { + s"call ${typeConversion(ty)} @$x1 (${compile_args(args).mkString(", ")})" + } } // compile K expressions def compile_exp(a: KExp) : String = a match { - case KReturn(KVar("void", _)) => - i"ret void" - case KReturn(KVar(x, ty)) => - i"ret ${get_ty(ty)} %$x" - case KReturn(KNum(i)) => - i"ret i32 $i" - case KLet(x: String, KCall(o: String, vrs: List[KVal], "Void"), e: KExp) => - i"${compile_val(KCall(o: String, vrs: List[KVal], "Void"))}" ++ compile_exp(e) - case KLet(x: String, v: KVal, e: KExp) => - i"%$x = ${compile_val(v)}" ++ compile_exp(e) + case KReturn(v) => { + val ty = get_typ_val(v) + if (ty == "Void") { + i"ret void" + } else { + i"ret ${typeConversion(ty)} ${compile_val(v)}" + } + } + case KLet(x: String, v: KVal, e: KExp) => { + val tv = get_typ_val(v) + if (tv == "Void") { + i"${compile_val(v)}" ++ compile_exp(e) + } else i"%$x = ${compile_val(v)}" ++ compile_exp(e) + } case KIf(x, e1, e2) => { val if_br = Fresh("if_branch") val else_br = Fresh("else_branch") @@ -278,100 +287,50 @@ } } - -val prelude = """ -declare i32 @printf(i8*, ...) - -@.str_nl = private constant [2 x i8] c"\0A\00" -@.str_star = private constant [2 x i8] c"*\00" -@.str_space = private constant [2 x i8] c" \00" - -define void @new_line() #0 { - %t0 = getelementptr [2 x i8], [2 x i8]* @.str_nl, i32 0, i32 0 - %1 = call i32 (i8*, ...) @printf(i8* %t0) - ret void -} - -define void @print_star() #0 { - %t0 = getelementptr [2 x i8], [2 x i8]* @.str_star, i32 0, i32 0 - %1 = call i32 (i8*, ...) @printf(i8* %t0) - ret void -} - -define void @print_space() #0 { - %t0 = getelementptr [2 x i8], [2 x i8]* @.str_space, i32 0, i32 0 - %1 = call i32 (i8*, ...) @printf(i8* %t0) - ret void -} - -define void @skip() #0 { - ret void -} - -@.str_int = private constant [3 x i8] c"%d\00" - -define void @print_int(i32 %x) { - %t0 = getelementptr [3 x i8], [3 x i8]* @.str_int, i32 0, i32 0 - call i32 (i8*, ...) @printf(i8* %t0, i32 %x) - ret void -} - -@.str_char = private constant [3 x i8] c"%c\00" - -define void @print_char(i32 %x) { - %t0 = getelementptr [3 x i8], [3 x i8]* @.str_char, i32 0, i32 0 - call i32 (i8*, ...) @printf(i8* %t0, i32 %x) - ret void -} - -; END OF BUILD-IN FUNCTIONS (prelude) - -""" - -def get_cont(ty: Ty) = ty match { - case "Int" => KReturn - case "Double" => KReturn - case "Void" => { (_: KVal) => KReturn(KVar("void", "Void")) } -} - -// compile function for declarations and main -def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match { - case Def(name, args, ty, body) => { - val ts2 = ts + (name -> ty) - val tkbody = typ_exp(CPS(body)(get_cont(ty)), ts2 ++ args.toMap) - (m"define ${get_ty(ty)} @$name (${args.map(compile_arg).mkString(",")}) {" ++ - compile_exp(tkbody) ++ - m"}\n", ts2) - } - case Main(body) => { - val tbody = typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts) - (m"define i32 @main() {" ++ - compile_exp(tbody) ++ - m"}\n", ts) - } - case Const(name, n) => { - (m"@$name = global i32 $n\n", ts + (name -> "Int")) - } - case FConst(name, x) => { - (m"@$name = global double $x\n", ts + (name -> "Double")) +def compile_def_args(args: List[(String, String)], ts: TyEnv) : (List[String], TyEnv) = args match { + case Nil => (Nil, ts) + case (n, t)::xs => { + if (t == "Void") throw new Exception("Compile error: argument of type void is invalid") + val (rest, env) = compile_def_args(xs, ts + (n -> t)) + (s"${typeConversion(t)} %$n" :: rest, env) } } -def compile_prog(prog: List[Decl], ty: TyEnv) : String = prog match { - case Nil => "" - case d::ds => { - val (s2, ty2) = compile_decl(d, ty) - s2 ++ compile_prog(ds, ty2) +def compile_decl(d: Decl, ts: TyEnv) : (String, TyEnv) = d match { + case Const(name, value) => { + (m"@$name = global i32 $value\n", ts + (name -> "Int")) + } + case FConst(name, value) => { + (m"@$name = global double $value\n", ts + (name -> "Double")) + } + case Def(name, args, ty, body) => { + val (argList, env1) = compile_def_args(args, ts + (name -> ty)) + (m"define ${typeConversion(ty)} @$name (${argList.mkString(", ")}) {" ++ + compile_exp(typ_exp(CPSi(body), env1)) ++ + m"}\n", ts + (name -> ty)) // don't preserve local variables in environment + } + case Main(body) => { + (m"define i32 @main() {" ++ + compile_exp(typ_exp(CPS(body)(_ => KReturn(KNum(0))), ts + ("main" -> "Int"))) ++ + m"}\n", ts + ("main" -> "Int")) } } -// main compiler functions -def compile(prog: List[Decl]) : String = - prelude ++ compile_prog(prog, Map("new_line" -> "Void", "skip" -> "Void", - "print_star" -> "Void", "print_space" -> "Void", - "print_int" -> "Void", "print_char" -> "Void")) +// recursively update the typing environment while compiling +def compile_block(prog: List[Decl], ts: TyEnv) : (String, TyEnv) = prog match { + case Nil => ("", ts) + case x::xs => { + val (compiled, env) = compile_decl(x, ts) + val (compiled_block, env1) = compile_block(xs, env) + (compiled ++ compiled_block, env1) + } +} -//import ammonite.ops._ +def fun_compile(prog: List[Decl]) : String = { + val tyenv = initialEnv + val (compiled, _) = compile_block(prog, tyenv) + prelude ++ compiled +} @main @@ -379,8 +338,8 @@ val path = os.pwd / fname val file = fname.stripSuffix("." ++ path.ext) val tks = tokenise(os.read(path)) - val ast = parse_tks(tks) - val code = compile(ast) + val ast = parse_tks(tks).head + val code = fun_compile(ast) println(code) } @@ -389,8 +348,8 @@ val path = os.pwd / fname val file = fname.stripSuffix("." ++ path.ext) val tks = tokenise(os.read(path)) - val ast = parse_tks(tks) - val code = compile(ast) + val ast = parse_tks(tks).head + val code = fun_compile(ast) //println(code) os.write.over(os.pwd / (file ++ ".ll"), code) } @@ -407,6 +366,3 @@ } - - - diff -r b40aaffe0793 -r 2f86ebda3629 solutions/cw5/fun_parser.sc --- a/solutions/cw5/fun_parser.sc Sat Dec 03 21:58:47 2022 +0000 +++ b/solutions/cw5/fun_parser.sc Fri Dec 09 11:00:05 2022 +0000 @@ -1,215 +1,263 @@ -// A parser for the Fun language -//================================ -// -// call with -// -// amm fun_parser.sc fact.fun +// Author: Zhuo Ying Jiang Li +// Starting code by Dr Christian Urban + +// parser: convert sequence of tokens to AST + // -// amm fun_parser.sc defs.fun +// Use this command to print parsed AST: +// amm fun_parser.sc .fun // -// this will generate a parse-tree from a list -// of tokens -import scala.language.implicitConversions -import scala.language.reflectiveCalls +import $file.fun_tokens, fun_tokens._ -import $file.fun_tokens, fun_tokens._ - - -// Parser combinators -// type parameter I needs to be of Seq-type -// -abstract class Parser[I, T](implicit ev: I => Seq[_]) { - def parse(ts: I): Set[(T, I)] +// more convenience for the map parsers later on; +// it allows writing nested patterns as +// case x ~ y ~ z => ... +case class ~[+A, +B](x: A, y: B) - def parse_single(ts: I) : T = - parse(ts).partition(_._2.isEmpty) match { - case (good, _) if !good.isEmpty => good.head._1 - case (good, err) if err.isEmpty => { - println (s"Parse Error\n $good \n $err") ; sys.exit(-1) } - case (_, err) => { - println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) } - } +// constraint for the input +type IsSeq[A] = A => Seq[_] + +abstract class Parser[I : IsSeq, T]{ + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if tl.isEmpty) yield hd } -// convenience for writing grammar rules -case class ~[+A, +B](_1: A, _2: B) +// parser combinators -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) +// sequence parser +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, ~[T, S]] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +// alternative parser +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) } -class FunParser[I, T, S](p: => Parser[I, T], - f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +// map parser +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } -// convenient combinators -implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { - def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) - def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) +// more convenient syntax for parser combinators +implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) } -def ListParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { - (p ==> ((s) => List(s))) || - (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] } -} +// ------------------------------------------------- +// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) +// atomic parser for types +case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns)) case _ => Set() } } -implicit def token2tparser(t: Token) = TokParser(t) - -implicit def TokOps(t: Token) = new { - def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) - def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) - def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) +// atomic parser for global ids +case object GlobalIdParser extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "global" => Set((tk._2, tkns)) + case _ => Set() + } } -case object EmptyParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = Set(("", ts)) +// atomic parser for ids +case object IdParser extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "id" => Set((tk._2, tkns)) + case _ => Set() + } } -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(n)::ts => Set((n, ts)) - case _ => Set () +// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5) +case object DoubleParser extends Parser[Tokens, Float] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns)) + case _ => Set() } } -case object FNumParser extends Parser[List[Token], Double] { - def parse(ts: List[Token]) = ts match { - case T_FNUM(x)::ts => Set((x, ts)) +// atomic parser for integers +case object IntParser extends Parser[Tokens, Int] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns)) + case _ => Set() + } +} + +// atomic parser for operators +case class OpParser(ops: Set[String]) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns)) + case _ => Set() + } +} + +// atomic parser for character +case object CharParser extends Parser[Tokens, Char] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "ch" => { + val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes + stripped match { + case "\\n" => Set(('\n', tkns)) + case "\\t" => Set(('\t', tkns)) + case "\\r" => Set(('\r', tkns)) + case c => Set((c(0), tkns)) + } + } case _ => Set() } } -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () +// parser for list of arguments +def ListParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { + (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } || + (p.map((s) => List(s))) +} + +// I may want to write string interpolations for: +// keywords, semicolon, colon, comma, parentheses +case class StrParser(s: String) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._2 == s => Set((s, tkns)) + case _ => Set() } } -case object CharConstParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_CHR(c)::ts => Set((c, ts)) - case _ => Set () - } -} - -case object TyParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_TY(s)::ts => Set((s, ts)) - case _ => Set () - } +implicit def parser_interpolation(sc: StringContext) = new { + def p(args: Any*) = StrParser(sc.s(args:_*)) } -// Abstract syntax trees for the Fun language -abstract class Exp -abstract class BExp -abstract class Decl +// the AST datastructures for the FUN language + +abstract class Exp +abstract class BExp +abstract class Decl case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl case class Main(e: Exp) extends Decl case class Const(name: String, v: Int) extends Decl -case class FConst(name: String, x: Double) extends Decl +case class FConst(name: String, x: Float) extends Decl case class Call(name: String, args: List[Exp]) extends Exp case class If(a: BExp, e1: Exp, e2: Exp) extends Exp case class Var(s: String) extends Exp -case class Num(i: Int) extends Exp // integer numbers -case class FNum(i: Double) extends Exp // floating numbers -case class ChConst(c: Int) extends Exp // char constant +case class Num(i: Int) extends Exp // integer numbers +case class FNum(i: Float) extends Exp // float numbers +case class ChConst(c: Int) extends Exp // character constants case class Aop(o: String, a1: Exp, a2: Exp) extends Exp -case class Sequence(e1: Exp, e2: Exp) extends Exp +case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons + case class Bop(o: String, a1: Exp, a2: Exp) extends BExp -// arithmetic expressions (there needs to be an F in the SEMICOLON case) -lazy val Exp: Parser[List[Token], Exp] = - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> - { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } || - (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L -lazy val L: Parser[List[Token], Exp] = - (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } || - (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T -lazy val T: Parser[List[Token], Exp] = - (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || - (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || - (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F -lazy val F: Parser[List[Token], Exp] = - (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } || - (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } || - (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || - IdParser ==> { case x => Var(x): Exp } || - NumParser ==> { case x => Num(x): Exp } || - CharConstParser ==> { case x => ChConst(x): Exp } || - FNumParser ==> { case x => FNum(x): Exp } +lazy val Exps: Parser[Tokens, Exp] = + (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } || + Exp + +lazy val Exp: Parser[Tokens, Exp] = + (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } || + M + +lazy val M: Parser[Tokens, Exp] = + (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } || + T + +lazy val T: Parser[Tokens, Exp] = + (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } || + U + +// includes negative factor +// a + - b CAN be recognised +// - - - b CAN be recognised +lazy val U: Parser[Tokens, Exp] = + (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } || + (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } || + F + +lazy val F: Parser[Tokens, Exp] = + (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } || + (p"skip").map(_ => Call("skip", Nil)) || // hardcoded + (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded + (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } || + (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted! + (IdParser || GlobalIdParser).map(x => Var(x)) || + IntParser.map(x => Num(x)) || + DoubleParser.map(x => FNum(x)) || + CharParser.map(x => ChConst(x.toInt)) || + (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x } -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || - (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || - (Exp ~ T_OP("<") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || - (Exp ~ T_OP(">") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", z, x): BExp } || - (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || - (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } || - (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp } +lazy val BExp: Parser[Tokens, BExp] = + (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } || + (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y } + +lazy val TypedIdParser: Parser[Tokens, (String, String)] = + (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) } -lazy val Arg : Parser[List[Token], (String, String)] = - (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) } +lazy val Defn: Parser[Tokens, Decl] = + (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{ + case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b) + } || + (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{ + case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b) + } -lazy val Defn: Parser[List[Token], Decl] = { - (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> - { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } || - (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> - { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl } -} +lazy val Constp: Parser[Tokens, Decl] = + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp + case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n) + } || + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp + case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n) + } -lazy val Const_decl: Parser[List[Token], Decl] = - (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==> - { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } || - (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==> - { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl } +// Int can be converted to Double but not viceversa +lazy val FConstp: Parser[Tokens, Decl] = + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{ + case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n) + } || + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{ + case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n) + } -lazy val Prog: Parser[List[Token], List[Decl]] = - (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || - (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || - (Exp ==> ((s) => List(Main(s)) : List[Decl])) +// Prog consists of global const declarations, f(x) defs, and exp in ANY order +// restricted to main body at the bottom +lazy val Prog: Parser[Tokens, List[Decl]] = + (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + Exp.map[List[Decl]](s => List(Main(s))) +def parse_tks(tokens: Tokens) = Prog.parse_all(tokens) -// Reading tokens and Writing parse trees +import scala.io.Source._ -//import ammonite.ops._ - -def parse_tks(tks: List[Token]) : List[Decl] = { - //println(Prog.parse(tks)) - Prog.parse_single(tks) +@main +def parse(filename: String) = { + val fun_code = fromFile(filename).getLines.mkString("\n") + // print the AST list to screen + println(parse_tks(tokenise(fun_code))) } - -//@doc("Parses a file.") -@main -def main(fname: String) : Unit = { - val tks = tokenise(os.read(os.pwd / fname)) - println(parse_tks(tks)) -} - - diff -r b40aaffe0793 -r 2f86ebda3629 solutions/cw5/fun_tokens.sc --- a/solutions/cw5/fun_tokens.sc Sat Dec 03 21:58:47 2022 +0000 +++ b/solutions/cw5/fun_tokens.sc Fri Dec 09 11:00:05 2022 +0000 @@ -1,27 +1,31 @@ -// A tokeniser for the Fun language -//================================== +// Author: Zhuo Ying Jiang Li +// Starting code by Dr Christian Urban + +// lexer + // -// call with -// -// amm fun_tokens.sc fact.fun -// -// amm fun_tokens.sc defs.fun +// Use this command to print the list of tokens: +// amm fun_token.sc .fun // - +type Token = (String, String) +type Tokens = List[Token] -import scala.language.implicitConversions -import scala.language.reflectiveCalls - -abstract class Rexp +// regular expressions including records +abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class RECD(x: String, r: Rexp) extends Rexp - +case class RANGE(chars: List[Char]) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class OPTIONAL(r: Rexp) extends Rexp +case class PLUS(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp // records for extracting strings or tokens + +// values abstract class Val case object Empty extends Val case class Chr(c: Char) extends Val @@ -29,20 +33,27 @@ case class Left(v: Val) extends Val case class Right(v: Val) extends Val case class Stars(vs: List[Val]) extends Val +case class Opt(v: Val) extends Val +case class Pls(vs: List[Val]) extends Val +case class Nt(vs: List[Val]) extends Val case class Rec(x: String, v: Val) extends Val - + // some convenience for typing in regular expressions def charlist2rexp(s : List[Char]): Rexp = s match { case Nil => ONE case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) + case c::vs => SEQ(CHAR(c), charlist2rexp(vs)) } -implicit def string2rexp(s : String) : Rexp = + +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) implicit def RexpOps(r: Rexp) = new { def | (s: Rexp) = ALT(r, s) def % = STAR(r) + def ? = OPTIONAL(r) + def + = PLUS(r) + def ^ (n: Int) = NTIMES(r, n) def ~ (s: Rexp) = SEQ(r, s) } @@ -50,66 +61,89 @@ def | (r: Rexp) = ALT(s, r) def | (r: String) = ALT(s, r) def % = STAR(s) + def ? = OPTIONAL(s) + def + = PLUS(s) + def ^ (n: Int) = NTIMES(s, n) def ~ (r: Rexp) = SEQ(s, r) def ~ (r: String) = SEQ(s, r) def $ (r: Rexp) = RECD(s, r) } -def nullable (r: Rexp) : Boolean = r match { +def nullable(r: Rexp) : Boolean = r match { case ZERO => false case ONE => true case CHAR(_) => false + case RANGE(_) => false case ALT(r1, r2) => nullable(r1) || nullable(r2) case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true + case OPTIONAL(r1) => true + case PLUS(r1) => nullable(r1) + case NTIMES(r1, n) => if (n == 0) true else nullable(r1) case RECD(_, r1) => nullable(r1) } -def der (c: Char, r: Rexp) : Rexp = r match { +def der(c: Char, r: Rexp) : Rexp = r match { case ZERO => ZERO case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO + case RANGE(chars) => if (chars.contains(c)) ONE else ZERO case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r) => SEQ(der(c, r), STAR(r)) + case OPTIONAL(r) => der(c, r) + case PLUS(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r1, n) => if (n == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, n - 1)) case RECD(_, r1) => der(c, r1) } - -// extracts a string from value +// extracts a string from a value def flatten(v: Val) : String = v match { case Empty => "" case Chr(c) => c.toString case Left(v) => flatten(v) case Right(v) => flatten(v) - case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) case Stars(vs) => vs.map(flatten).mkString + case Opt(v) => flatten(v) + case Pls(vs) => vs.map(flatten).mkString + case Nt(vs) => vs.map(flatten).mkString case Rec(_, v) => flatten(v) } // extracts an environment from a value; -// used for tokenise a string -def env(v: Val) : List[(String, String)] = v match { +// used for tokenising a string +def env(v: Val) : Tokens = v match { case Empty => Nil case Chr(c) => Nil case Left(v) => env(v) case Right(v) => env(v) case Sequ(v1, v2) => env(v1) ::: env(v2) case Stars(vs) => vs.flatMap(env) + case Opt(v) => env(v) + case Pls(vs) => vs.flatMap(env) + case Nt(vs) => vs.flatMap(env) case Rec(x, v) => (x, flatten(v))::env(v) } -// The Injection Part of the lexer + +// The injection and mkeps part of the lexer +//=========================================== def mkeps(r: Rexp) : Val = r match { case ONE => Empty - case ALT(r1, r2) => + case RANGE(chars) => throw new Exception("lexing error") // this will never be called but the coursework asks for it so... + case ALT(r1, r2) => if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) case STAR(r) => Stars(Nil) + case OPTIONAL(r) => Opt(Empty) + case PLUS(r) => Pls(List(mkeps(r))) // scala define a list with one element + case NTIMES(r, n) => if (n == 0) Nt(Nil) else Nt(List.fill(n)(mkeps(r))) // wrong case RECD(x, r) => Rec(x, mkeps(r)) + case _ => throw new Exception("lexing error") } def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { @@ -119,9 +153,12 @@ case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) - case (CHAR(d), Empty) => Chr(c) + case (CHAR(d), Empty) => Chr(c) + case (RANGE(chars), Empty) => Chr(c) + case (OPTIONAL(r1), v) => Opt(inj(r1, c, v)) + case (PLUS(r1), Sequ(v1, Stars(vs))) => Pls(inj(r1, c, v1)::vs) + case (NTIMES(r1, n), Sequ(v1, Nt(vs))) => Nt(inj(r1, c, v1)::vs) case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) - case _ => { println ("Injection error") ; sys.exit(-1) } } // some "rectification" functions for simplification @@ -135,15 +172,14 @@ def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match { case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) } -def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = +def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = (v:Val) => Sequ(f1(Empty), f2(v)) -def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = +def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = (v:Val) => Sequ(f1(v), f2(Empty)) -def F_RECD(f: Val => Val) = (v:Val) => v match { - case Rec(x, v) => Rec(x, f(v)) -} + def F_ERROR(v: Val): Val = throw new Exception("error") +// simplification def simp(r: Rexp): (Rexp, Val => Val) = r match { case ALT(r1, r2) => { val (r1s, f1s) = simp(r1) @@ -152,7 +188,7 @@ case (ZERO, _) => (r2s, F_RIGHT(f2s)) case (_, ZERO) => (r1s, F_LEFT(f1s)) case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) - else (ALT (r1s, r2s), F_ALT(f1s, f2s)) + else (ALT (r1s, r2s), F_ALT(f1s, f2s)) } } case SEQ(r1, r2) => { @@ -166,115 +202,75 @@ case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) } } - case RECD(x, r1) => { - val (r1s, f1s) = simp(r1) - (RECD(x, r1s), F_RECD(f1s)) - } case r => (r, F_ID) } // lexing functions including simplification def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) } + case Nil => if (nullable(r)) mkeps(r) else + { throw new Exception("lexing error") } case c::cs => { val (r_simp, f_simp) = simp(der(c, r)) inj(r, c, f_simp(lex_simp(r_simp, cs))) } } -def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList)) +def lexing_simp(r: Rexp, s: String) = + env(lex_simp(r, s.toList)) -// The Lexing Rules for the Fun Language - -def PLUS(r: Rexp) = r ~ r.% -def OPT(r: Rexp) = r | ONE +// FUN language lexer -val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | - "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | - "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" |"E" | "F" | "G" | - "H" | "I" | "J" | "K" |"L" | "M" | "N" | - "O" | "P" | "Q" | "R" |"S" | "T" | "U" | - "V" | "W" | "X" | "Y" | "Z" | "_" | ":" -val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val FNUM = OPT("-") ~ NUM ~ "." ~ NUM -val KEYWORD : Rexp = "if" | "then" | "else" | "def" | "val" -val TYPE : Rexp = "Void" | "Int" | "Double" -val SEMI: Rexp = ";" -val COLON: Rexp = ":" -val COMMA: Rexp = "," -val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" -val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r") -val RPAREN: Rexp = ")" | "}" -val LPAREN: Rexp = "(" | "{" -val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "-" | "." | "\"" | "=" | "," | "(" | ")" | "{" | "}" -val ALL2 = ALL | "\n" -val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n") - -val CHR :Rexp = "'" ~ (ALL | "\\n") ~ "'" +val DIGIT = RANGE("0123456789".toList) +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val SYM = RANGE("!\"#$%&'()*+,-./:;<>=?`@[]\\^_{}|~".toList) // I referenced the CPP ASCII table https://en.cppreference.com/w/cpp/language/ascii -val FUN_REGS = (("k" $ KEYWORD) | - ("t" $ TYPE) | - ("i" $ ID) | - ("ch" $ CHR) | - ("o" $ OP) | - ("n" $ NUM) | - ("f" $ FNUM) | - ("s" $ SEMI) | - ("co" $ COLON) | - ("c" $ COMMA) | - ("pl" $ LPAREN) | - ("pr" $ RPAREN) | - ("w" $ (WHITESPACE | COMMENT))).% - - - -// The tokens for the Fun language +val KEYWORD : Rexp = "val" | "if" | "then" | "else" | "def" | "skip" // "skip" is hardcoded because hanoi.fun calls skip() without parentheses +val TYPE : Rexp = "Int" | "Double" | "Void" +val GLOBAL_ID : Rexp = UPPERCASE ~ ("_" | LOWERCASE | DIGIT | UPPERCASE).% // start with capital letter and followed by any case +val ID : Rexp = LOWERCASE ~ ("_" | UPPERCASE | LOWERCASE | DIGIT).% // start with lowercase +val SEMI : Rexp = ";" +val COLON : Rexp = ":" +val OP : Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" // no && and || operators +val INT : Rexp = DIGIT.+ +val DOUBLE : Rexp = DIGIT.+ ~ "." ~ DIGIT.+ // negative numbers sign is lexed as operator, but the parser will identify negative numbers +val COMMA : Rexp = "," +val WHITESPACES: Rexp = (" " | "\n" | "\t" | "\r").+ // whitespaces are either " " or \n or \t or \r +val LPAREN : Rexp = RANGE("({".toList) +val RPAREN : Rexp = RANGE(")}".toList) +val CH : Rexp = "'" ~ (LOWERCASE | UPPERCASE | DIGIT | SYM | " " | "\\n" | "\\t" | "\\r") ~ "'" // \n, \t and \r should also be tokenized, any character should be, whitespaces too +val COMMENT : Rexp = ("//" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \t\r".toList)).% ~ "\n") | ("/*" ~ (LOWERCASE | UPPERCASE | SYM | DIGIT | RANGE(" \n\t\r".toList)).% ~ "*/") -abstract class Token extends Serializable -case object T_SEMI extends Token -case object T_COMMA extends Token -case object T_COLON extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class T_ID(s: String) extends Token -case class T_FID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_FNUM(x: Double) extends Token -case class T_KWD(s: String) extends Token -case class T_TY(s: String) extends Token -case class T_CHR(i: Int) extends Token +val FUN_REGS = (("keyword" $ KEYWORD) | + ("type" $ TYPE) | + ("global" $ GLOBAL_ID) | + ("id" $ ID) | + ("op" $ OP) | + ("double" $ DOUBLE) | + ("int" $ INT) | + ("semi" $ SEMI) | + ("colon" $ COLON) | + ("comma" $ COMMA) | + ("ch" $ CH) | + ("par" $ (LPAREN | RPAREN)) | + COMMENT | WHITESPACES).% -val token : PartialFunction[(String, String), Token] = { - case ("k", s) => T_KWD(s) - case ("t", s) => T_TY(s) - case ("i", s) => T_ID(s) - case ("o", s) => T_OP(s) - case ("n", s) => T_NUM(s.toInt) - case ("ch", s) => if (s == "'\\n'") T_CHR(10) else T_CHR(s(1).toInt) - case ("f", s) => T_FNUM(s.toDouble) - case ("s", _) => T_SEMI - case ("c", _) => T_COMMA - case ("co", _) => T_COLON - case ("pl", _) => T_LPAREN - case ("pr", _) => T_RPAREN +def fun_lex(program: String) : Tokens = { + lexing_simp(FUN_REGS, program) } - -def tokenise(s: String) : List[Token] = { - val tks = lexing_simp(FUN_REGS, s).collect(token) - if (tks.length != 0) tks - else { println (s"Tokenise Error") ; sys.exit(-1) } +def tokenise(program: String) : Tokens = { + lexing_simp(FUN_REGS, program) } -//import ammonite.ops._ +import scala.io.Source._ -//@doc("Tokenising a file.") @main -def main(fname: String) = { - println(tokenise(os.read(os.pwd / fname))) +def lex(filename: String) = { + // read file + val fun_code = fromFile(filename).getLines.mkString("\n") + // print tokens to screen + println(fun_lex(fun_code).mkString("\n")) }