Attic/i.scala
changeset 742 b5b5583a3a08
parent 93 4794759139ea
equal deleted inserted replaced
741:e66bd5c563eb 742:b5b5583a3a08
       
     1 
       
     2 // regular expressions including NOT
       
     3 abstract class Rexp
       
     4 
       
     5 case object NULL extends Rexp
       
     6 case object EMPTY extends Rexp
       
     7 case object ALLC extends Rexp            // recognises any character
       
     8 case class CHAR(c: Char) extends Rexp
       
     9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
       
    10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
       
    11 case class STAR(r: Rexp) extends Rexp
       
    12 case class NOT(r: Rexp) extends Rexp     // negation of a regular expression
       
    13 
       
    14 
       
    15 // nullable function: tests whether the regular 
       
    16 // expression can recognise the empty string
       
    17 def nullable (r: Rexp) : Boolean = r match {
       
    18   case NULL => false
       
    19   case EMPTY => true
       
    20   case ALLC => false
       
    21   case CHAR(_) => false
       
    22   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    23   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    24   case STAR(_) => true
       
    25   case NOT(r) => !(nullable(r))
       
    26 }
       
    27 
       
    28 // tests whether a regular expression 
       
    29 // cannot recognise more
       
    30 def no_more (r: Rexp) : Boolean = r match {
       
    31   case NULL => true
       
    32   case EMPTY => false
       
    33   case ALLC => false
       
    34   case CHAR(_) => false
       
    35   case ALT(r1, r2) => no_more(r1) && no_more(r2)
       
    36   case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
       
    37   case STAR(_) => false
       
    38   case NOT(r) => !(no_more(r))
       
    39 }
       
    40 
       
    41 
       
    42 // derivative of a regular expression w.r.t. a character
       
    43 def der (c: Char, r: Rexp) : Rexp = r match {
       
    44   case NULL => NULL
       
    45   case EMPTY => NULL  
       
    46   case ALLC => EMPTY
       
    47   case CHAR(d) => if (c == d) EMPTY else NULL
       
    48   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    49   case SEQ(r1, r2) => 
       
    50     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    51     else SEQ(der(c, r1), r2)
       
    52   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    53   case NOT(r) => NOT(der (c, r))
       
    54 }
       
    55 
       
    56 // regular expression for specifying 
       
    57 // ranges of characters
       
    58 def Range(s : List[Char]) : Rexp = s match {
       
    59   case Nil => NULL
       
    60   case c::Nil => CHAR(c)
       
    61   case c::s => ALT(CHAR(c), Range(s))
       
    62 }
       
    63 def RANGE(s: String) = Range(s.toList)
       
    64 
       
    65 
       
    66 // one or more
       
    67 def PLUS(r: Rexp) = SEQ(r, STAR(r))
       
    68 
       
    69 // many alternatives
       
    70 def Alts(rs: List[Rexp]) : Rexp = rs match {
       
    71   case Nil => NULL
       
    72   case r::Nil => r
       
    73   case r::rs => ALT(r, Alts(rs))
       
    74 }
       
    75 def ALTS(rs: Rexp*) = Alts(rs.toList)
       
    76 
       
    77 // repetitions
       
    78 def Seqs(rs: List[Rexp]) : Rexp = rs match {
       
    79   case Nil => NULL
       
    80   case r::Nil => r
       
    81   case r::rs => SEQ(r, Seqs(rs))
       
    82 }
       
    83 def SEQS(rs: Rexp*) = Seqs(rs.toList)
       
    84 
       
    85 // some convenience for typing in regular expressions
       
    86 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
    87   case Nil => EMPTY
       
    88   case c::Nil => CHAR(c)
       
    89   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    90 }
       
    91 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    92 
       
    93 
       
    94 type Rule[T] = (Rexp, List[Char] => T)
       
    95 
       
    96 case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) {
       
    97 
       
    98   def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = 
       
    99     s match {
       
   100       case Nil if (nullable(r)) => Some(Nil, action(t))
       
   101       case Nil => None
       
   102       case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
       
   103       case c::s if (no_more(der (c, r))) => None
       
   104       case c::s => munch(der (c, r), action, s, t ::: List(c))
       
   105     }
       
   106 
       
   107   def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
       
   108     val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
       
   109     if (somes == Nil) Right(s.mkString) 
       
   110     else Left(somes sortBy (_._1.length) head)
       
   111   }
       
   112 
       
   113   def tokenize(cs: List[Char]) : List[T] = cs match {
       
   114     case Nil => Nil
       
   115     case _ => one_token(cs) match {
       
   116       case Left((rest, token)) => token :: tokenize(rest)
       
   117       case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } 
       
   118     }
       
   119   }
       
   120 
       
   121   def fromString(s: String) : List[T] = 
       
   122     tokenize(s.toList).filterNot(excl.contains(_))
       
   123 
       
   124   def fromFile(name: String) : List[T] = 
       
   125     fromString(io.Source.fromFile(name).mkString)
       
   126 
       
   127 }
       
   128 
       
   129 
       
   130 // parser combinators with input type I and return type T
       
   131 
       
   132 abstract class Parser[I <% Seq[_], T] {
       
   133   def parse(ts: I): Set[(T, I)]
       
   134 
       
   135   def parse_all(ts: I) : Set[T] =
       
   136     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
       
   137 
       
   138   def parse_single(ts: I) : T = parse_all(ts).toList match {
       
   139     case t::Nil => t
       
   140     case _ => { println ("Parse Error") ; sys.exit(-1) }
       
   141   }
       
   142     
       
   143   def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
       
   144   def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
       
   145   def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
       
   146   def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
       
   147   def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
       
   148 }
       
   149 
       
   150 class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
       
   151   def parse(sb: I) = 
       
   152     for ((head1, tail1) <- p.parse(sb); 
       
   153          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
       
   154 }
       
   155 
       
   156 class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
       
   157   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
   158 }
       
   159 
       
   160 class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
       
   161   def parse(sb: I) = 
       
   162     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
   163 }
       
   164 
       
   165 
       
   166 // A parser and evaluator for teh while language
       
   167 // 
       
   168 //:load matcher.scala
       
   169 //:load parser3.scala
       
   170 
       
   171 // some regular expressions
       
   172 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
       
   173 val DIGIT = RANGE("0123456789")
       
   174 val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) 
       
   175 val NUM = PLUS(DIGIT)
       
   176 val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") 
       
   177 val SEMI: Rexp = ";"
       
   178 val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
       
   179 val WHITESPACE = PLUS(RANGE(" \n"))
       
   180 val RPAREN: Rexp = ")"
       
   181 val LPAREN: Rexp = "("
       
   182 val BEGIN: Rexp = "{"
       
   183 val END: Rexp = "}"
       
   184 val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
       
   185 
       
   186 // tokens for classifying the strings that have been recognised
       
   187 abstract class Token
       
   188 case object T_WHITESPACE extends Token
       
   189 case object T_COMMENT extends Token
       
   190 case object T_SEMI extends Token
       
   191 case object T_LPAREN extends Token
       
   192 case object T_RPAREN extends Token
       
   193 case object T_BEGIN extends Token
       
   194 case object T_END extends Token
       
   195 case class T_ID(s: String) extends Token
       
   196 case class T_OP(s: String) extends Token
       
   197 case class T_NUM(s: String) extends Token
       
   198 case class T_KWD(s: String) extends Token
       
   199 
       
   200 val lexing_rules: List[Rule[Token]] = 
       
   201   List((KEYWORD, (s) => T_KWD(s.mkString)),
       
   202        (ID, (s) => T_ID(s.mkString)),
       
   203        (OP, (s) => T_OP(s.mkString)),
       
   204        (NUM, (s) => T_NUM(s.mkString)),
       
   205        (SEMI, (s) => T_SEMI),
       
   206        (LPAREN, (s) => T_LPAREN),
       
   207        (RPAREN, (s) => T_RPAREN),
       
   208        (BEGIN, (s) => T_BEGIN),
       
   209        (END, (s) => T_END),
       
   210        (WHITESPACE, (s) => T_WHITESPACE),
       
   211        (COMMENT, (s) => T_COMMENT))
       
   212 
       
   213 // the tokenizer
       
   214 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
       
   215 
       
   216 // the abstract syntax trees
       
   217 abstract class Stmt
       
   218 abstract class AExp
       
   219 abstract class BExp 
       
   220 type Block = List[Stmt]
       
   221 case object Skip extends Stmt
       
   222 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
       
   223 case class While(b: BExp, bl: Block) extends Stmt
       
   224 case class Assign(s: String, a: AExp) extends Stmt
       
   225 case class Write(s: String) extends Stmt
       
   226 
       
   227 case class Var(s: String) extends AExp
       
   228 case class Num(i: Int) extends AExp
       
   229 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
       
   230 
       
   231 case object True extends BExp
       
   232 case object False extends BExp
       
   233 case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
       
   234 
       
   235 // atomic parsers
       
   236 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
       
   237   def parse(ts: List[Token]) = ts match {
       
   238     case t::ts if (t == tok) => Set((t, ts)) 
       
   239     case _ => Set ()
       
   240   }
       
   241 }
       
   242 implicit def token2tparser(t: Token) = TokParser(t)
       
   243 
       
   244 case object NumParser extends Parser[List[Token], Int] {
       
   245   def parse(ts: List[Token]) = ts match {
       
   246     case T_NUM(s)::ts => Set((s.toInt, ts)) 
       
   247     case _ => Set ()
       
   248   }
       
   249 }
       
   250 
       
   251 case object IdParser extends Parser[List[Token], String] {
       
   252   def parse(ts: List[Token]) = ts match {
       
   253     case T_ID(s)::ts => Set((s, ts)) 
       
   254     case _ => Set ()
       
   255   }
       
   256 }
       
   257 
       
   258 
       
   259 // arithmetic expressions
       
   260 lazy val AExp: Parser[List[Token], AExp] = 
       
   261   (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
       
   262   (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T  
       
   263 lazy val T: Parser[List[Token], AExp] = 
       
   264   (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
       
   265 lazy val F: Parser[List[Token], AExp] = 
       
   266   (T_LPAREN ~> AExp <~ T_RPAREN) || 
       
   267   IdParser ==> Var || 
       
   268   NumParser ==> Num
       
   269 
       
   270 // boolean expressions
       
   271 lazy val BExp: Parser[List[Token], BExp] = 
       
   272   (T_KWD("true") ==> ((_) => True: BExp)) || 
       
   273   (T_KWD("false") ==> ((_) => False: BExp)) ||
       
   274   (T_LPAREN ~> BExp <~ T_RPAREN) ||
       
   275   (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || 
       
   276   (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || 
       
   277   (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || 
       
   278   (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } 
       
   279 
       
   280 lazy val Stmt: Parser[List[Token], Stmt] =
       
   281   (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
       
   282   (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
       
   283   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
       
   284     { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
       
   285   (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || 
       
   286   (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } 
       
   287 
       
   288 lazy val Stmts: Parser[List[Token], Block] =
       
   289   (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
       
   290   (Stmt ==> ((s) => List(s) : Block))
       
   291 
       
   292 lazy val Block: Parser[List[Token], Block] =
       
   293   (T_BEGIN ~> Stmts <~ T_END) || 
       
   294   (Stmt ==> ((s) => List(s)))
       
   295 
       
   296 // compiler
       
   297 val beginning = """
       
   298 .class public XXX.XXX
       
   299 .super java/lang/Object
       
   300 
       
   301 .method public <init>()V
       
   302    aload_0
       
   303    invokenonvirtual java/lang/Object/<init>()V
       
   304    return
       
   305 .end method
       
   306 
       
   307 .method public static write(I)V 
       
   308     .limit locals 5 
       
   309     .limit stack 5 
       
   310     iload 0 
       
   311     getstatic java/lang/System/out Ljava/io/PrintStream; 
       
   312     swap 
       
   313     invokevirtual java/io/PrintStream/println(I)V 
       
   314     return 
       
   315 .end method
       
   316 
       
   317 
       
   318 .method public static main([Ljava/lang/String;)V
       
   319    .limit locals 200
       
   320    .limit stack 200
       
   321 
       
   322 """
       
   323 
       
   324 val ending = """
       
   325 
       
   326    return
       
   327 
       
   328 .end method
       
   329 """
       
   330 
       
   331 // for generating new labels
       
   332 var counter = -1
       
   333 
       
   334 def Fresh(x: String) = {
       
   335   counter += 1
       
   336   x ++ "_" ++ counter.toString()
       
   337 }
       
   338 
       
   339 type Env = Map[String, String]
       
   340 type Instrs = List[String]
       
   341 
       
   342 def compile_aexp(a: AExp, env : Env) : Instrs = a match {
       
   343   case Num(i) => List("ldc " + i.toString + "\n")
       
   344   case Var(s) => List("iload " + env(s) + "\n")
       
   345   case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
       
   346   case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
       
   347   case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
       
   348 }
       
   349 
       
   350 def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
       
   351   case True => Nil
       
   352   case False => List("goto " + jmp + "\n")
       
   353   case Relop("=", a1, a2) => 
       
   354     compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
       
   355   case Relop("!=", a1, a2) => 
       
   356     compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
       
   357   case Relop("<", a1, a2) => 
       
   358     compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
       
   359 }
       
   360 
       
   361 
       
   362 def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
       
   363   case Skip => (Nil, env)
       
   364   case Assign(x, a) => {
       
   365     val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
       
   366     (compile_aexp(a, env) ++ 
       
   367      List("istore " + index + "\n"), env + (x -> index))
       
   368   } 
       
   369   case If(b, bl1, bl2) => {
       
   370     val if_else = Fresh("If_else")
       
   371     val if_end = Fresh("If_end")
       
   372     val (instrs1, env1) = compile_bl(bl1, env)
       
   373     val (instrs2, env2) = compile_bl(bl2, env1)
       
   374     (compile_bexp(b, env, if_else) ++
       
   375      instrs1 ++
       
   376      List("goto " + if_end + "\n") ++
       
   377      List("\n" + if_else + ":\n\n") ++
       
   378      instrs2 ++
       
   379      List("\n" + if_end + ":\n\n"), env2)
       
   380   }
       
   381   case While(b, bl) => {
       
   382     val loop_begin = Fresh("Loop_begin")
       
   383     val loop_end = Fresh("Loop_end")
       
   384     val (instrs1, env1) = compile_bl(bl, env)
       
   385     (List("\n" + loop_begin + ":\n\n") ++
       
   386      compile_bexp(b, env, loop_end) ++
       
   387      instrs1 ++
       
   388      List("goto " + loop_begin + "\n") ++
       
   389      List("\n" + loop_end + ":\n\n"), env1)
       
   390   }
       
   391   case Write(x) => 
       
   392     (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
       
   393 }
       
   394 
       
   395 def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
       
   396   case Nil => (Nil, env)
       
   397   case s::bl => {
       
   398     val (instrs1, env1) = compile_stmt(s, env)
       
   399     val (instrs2, env2) = compile_bl(bl, env1)
       
   400     (instrs1 ++ instrs2, env2)
       
   401   }
       
   402 }
       
   403 
       
   404 def compile(input: String) : String = {
       
   405   val class_name = input.split('.')(0)
       
   406   val tks = Tok.fromFile(input)
       
   407   val ast = Stmts.parse_single(tks)
       
   408   val instructions = compile_bl(ast, Map.empty)._1
       
   409   (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
       
   410 }
       
   411 
       
   412 
       
   413 def compile_to(input: String, output: String) = {
       
   414   val fw = new java.io.FileWriter(output) 
       
   415   fw.write(compile(input)) 
       
   416   fw.close()
       
   417 }
       
   418 
       
   419 //
       
   420 val tks = Tok.fromString("x := x + 1")
       
   421 val ast = Stmt.parse_single(tks)
       
   422 println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
       
   423 
       
   424 
       
   425 
       
   426 //examples
       
   427 
       
   428 compile_to("loops.while", "loops.j")
       
   429 //compile_to("fib.while", "fib.j")
       
   430 
       
   431 
       
   432 // testing cases for time measurements
       
   433 /*
       
   434 def time_needed[T](i: Int, code: => T) = {
       
   435   val start = System.nanoTime()
       
   436   for (j <- 1 to i) code
       
   437   val end = System.nanoTime()
       
   438   (end - start)/(i * 1.0e9)
       
   439 }
       
   440 
       
   441 // for testing
       
   442 import scala.sys.process._
       
   443 
       
   444 val test_prog = """
       
   445 start := XXX;
       
   446 x := start;
       
   447 y := start;
       
   448 z := start;
       
   449 while 0 < x do {
       
   450  while 0 < y do {
       
   451   while 0 < z do {
       
   452     z := z - 1
       
   453   };
       
   454   z := start;
       
   455   y := y - 1
       
   456  };     
       
   457  y := start;
       
   458  x := x - 1
       
   459 };
       
   460 write x;
       
   461 write y;
       
   462 write z
       
   463 """
       
   464 
       
   465 
       
   466 def compile_test(n: Int) : Unit = {
       
   467   val class_name = "LOOP"
       
   468   val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
       
   469   val ast = Stmts.parse_single(tks)
       
   470   val instructions = compile_bl(ast, Map.empty)._1
       
   471   val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
       
   472   val fw = new java.io.FileWriter(class_name + ".j") 
       
   473   fw.write(assembly) 
       
   474   fw.close()
       
   475   val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
       
   476   println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
       
   477 }
       
   478 
       
   479 List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
       
   480 
       
   481 
       
   482 
       
   483 // javabyte code assmbler
       
   484 //
       
   485 // java -jar jvm/jasmin-2.4/jasmin.jar loops.j
       
   486 
       
   487 */
       
   488 
       
   489 
       
   490 
       
   491 
       
   492