| 472 |      1 | // Part 2 about a "Compiler" for the Brainf*** language
 | 
|  |      2 | //======================================================
 | 
| 235 |      3 | 
 | 
| 404 |      4 | object M5b {
 | 
| 384 |      5 | 
 | 
| 235 |      6 | // !!! Copy any function you need from file bf.scala !!!
 | 
|  |      7 | //
 | 
|  |      8 | // If you need any auxiliary function, feel free to 
 | 
|  |      9 | // implement it, but do not make any changes to the
 | 
|  |     10 | // templates below.
 | 
|  |     11 | 
 | 
|  |     12 | 
 | 
|  |     13 | def time_needed[T](n: Int, code: => T) = {
 | 
|  |     14 |   val start = System.nanoTime()
 | 
|  |     15 |   for (i <- 0 until n) code
 | 
|  |     16 |   val end = System.nanoTime()
 | 
|  |     17 |   (end - start)/(n * 1.0e9)
 | 
|  |     18 | }
 | 
|  |     19 | 
 | 
| 472 |     20 | type Mem = Map[Int, Int]
 | 
| 460 |     21 | 
 | 
| 235 |     22 | 
 | 
| 404 |     23 | import io.Source
 | 
|  |     24 | import scala.util._
 | 
|  |     25 | 
 | 
| 472 |     26 | def load_bff(name: String) : String = 
 | 
|  |     27 |   Try(Source.fromFile(name)("ISO-8859-1").mkString).getOrElse("")
 | 
| 404 |     28 | 
 | 
| 472 |     29 | def sread(mem: Mem, mp: Int) : Int = 
 | 
|  |     30 |   mem.getOrElse(mp, 0)
 | 
|  |     31 | 
 | 
|  |     32 | def write(mem: Mem, mp: Int, v: Int) : Mem =
 | 
|  |     33 |   mem.updated(mp, v)
 | 
|  |     34 | 
 | 
| 404 |     35 | def jumpRight(prog: String, pc: Int, level: Int) : Int = {
 | 
| 472 |     36 |   if (prog.length <= pc) pc 
 | 
|  |     37 |   else (prog(pc), level) match {
 | 
|  |     38 |     case (']', 0) => pc + 1
 | 
|  |     39 |     case (']', l) => jumpRight(prog, pc + 1, l - 1)
 | 
|  |     40 |     case ('[', l) => jumpRight(prog, pc + 1, l + 1)
 | 
|  |     41 |     case (_, l) => jumpRight(prog, pc + 1, l)
 | 
|  |     42 |   }
 | 
| 404 |     43 | }
 | 
|  |     44 | 
 | 
| 472 |     45 | def jumpLeft(prog: String, pc: Int, level: Int) : Int = {
 | 
|  |     46 |   if (pc < 0) pc 
 | 
|  |     47 |   else (prog(pc), level) match {
 | 
|  |     48 |     case ('[', 0) => pc + 1
 | 
|  |     49 |     case ('[', l) => jumpLeft(prog, pc - 1, l - 1)
 | 
|  |     50 |     case (']', l) => jumpLeft(prog, pc - 1, l + 1)
 | 
|  |     51 |     case (_, l) => jumpLeft(prog, pc - 1, l)
 | 
| 404 |     52 |   }
 | 
|  |     53 | }
 | 
|  |     54 | 
 | 
| 472 |     55 | def compute(prog: String, pc: Int, mp: Int, mem: Mem) : Mem = {
 | 
|  |     56 |   if (0 <= pc && pc < prog.length) { 
 | 
|  |     57 |     val (new_pc, new_mp, new_mem) = prog(pc) match {
 | 
|  |     58 |       case '>' => (pc + 1, mp + 1, mem)
 | 
|  |     59 |       case '<' => (pc + 1, mp - 1, mem)
 | 
|  |     60 |       case '+' => (pc + 1, mp, write(mem, mp, sread(mem, mp) + 1))
 | 
|  |     61 |       case '-' => (pc + 1, mp, write(mem, mp, sread(mem, mp) - 1))
 | 
|  |     62 |       case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) }
 | 
|  |     63 |       case '['  => if (sread(mem, mp) == 0) (jumpRight(prog, pc + 1, 0), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |     64 |       case ']'  => if (sread(mem, mp) != 0) (jumpLeft(prog, pc - 1, 0), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |     65 |       case _ => (pc + 1, mp, mem)
 | 
|  |     66 |     }		     
 | 
|  |     67 |     compute(prog, new_pc, new_mp, new_mem)	
 | 
|  |     68 |   }
 | 
|  |     69 |   else mem
 | 
|  |     70 | }
 | 
| 404 |     71 | 
 | 
| 472 |     72 | def run(prog: String, m: Mem = Map()) = compute(prog, 0, 0, m)
 | 
|  |     73 | 
 | 
|  |     74 | 
 | 
|  |     75 | // The baseline to what we can compare our "compiler"
 | 
|  |     76 | // implemented below. It should require something like 
 | 
|  |     77 | // 60 seconds for the calculation on my laptop
 | 
|  |     78 | //
 | 
|  |     79 | //time_needed(1, run(load_bff("benchmark.bf")))
 | 
|  |     80 | 
 | 
|  |     81 | 
 | 
|  |     82 | 
 | 
|  |     83 | // DEBUGGING INFORMATION!!!
 | 
|  |     84 | //
 | 
|  |     85 | // Compiler, even real ones, are fiedishly difficult to get
 | 
|  |     86 | // to prduce correct code. The point is that for example for
 | 
|  |     87 | // the sierpinski program, they need to still generate code
 | 
|  |     88 | // that displays such a triangle. If yes, then one usually
 | 
|  |     89 | // can take comfort that all is well. If not, then something
 | 
|  |     90 | // went wrong during the optimisations.
 | 
| 404 |     91 | 
 | 
|  |     92 | 
 | 
| 235 |     93 | 
 | 
| 472 |     94 | // (5) Write a function jtable that precomputes the "jump
 | 
|  |     95 | //     table" for a bf-program. This function takes a bf-program 
 | 
|  |     96 | //     as an argument and Returns a Map[Int, Int]. The 
 | 
|  |     97 | //     purpose of this map is to record the information
 | 
|  |     98 | //     that given on the position pc is a '[' or a ']',
 | 
|  |     99 | //     then to which pc-position do we need to jump next?
 | 
|  |    100 | // 
 | 
|  |    101 | //     For example for the program
 | 
|  |    102 | //    
 | 
|  |    103 | //       "+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]"
 | 
|  |    104 | //
 | 
|  |    105 | //     we obtain the map
 | 
|  |    106 | //
 | 
|  |    107 | //       Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6)
 | 
|  |    108 | //  
 | 
|  |    109 | //     This states that for the '[' on position 5, we need to
 | 
|  |    110 | //     jump to position 20, which is just after the corresponding ']'.
 | 
|  |    111 | //     Similarly, for the ']' on position 19, we need to jump to
 | 
|  |    112 | //     position 6, which is just after the '[' on position 5, and so
 | 
|  |    113 | //     on. The idea is to not calculate this information each time
 | 
|  |    114 | //     we hit a bracket, but just look up this information in the 
 | 
|  |    115 | //     jtable. You can use the jumpLeft and jumpRight functions
 | 
|  |    116 | //     from Part 1 for calculating the jtable.
 | 
|  |    117 | //
 | 
|  |    118 | //     Then adapt the compute and run functions from Part 1 in order 
 | 
|  |    119 | //     to take advantage of the information stored in the jtable. 
 | 
|  |    120 | //     This means whenever jumpLeft and jumpRight was called previously,
 | 
|  |    121 | //     you should look up the jump address in the jtable.
 | 
|  |    122 |  
 | 
|  |    123 | 
 | 
|  |    124 | def jtable(pg: String) : Map[Int, Int] = 
 | 
|  |    125 |     (0 until pg.length).collect { pc => pg(pc) match {
 | 
|  |    126 |       case '[' => (pc -> jumpRight(pg, pc + 1, 0))
 | 
|  |    127 |       case ']' => (pc -> jumpLeft(pg, pc - 1, 0))
 | 
|  |    128 |     }}.toMap
 | 
|  |    129 | 
 | 
|  |    130 | 
 | 
|  |    131 | // testcase
 | 
|  |    132 | // jtable("""+++++[->++++++++++<]>--<+++[->>++++++++++<<]>>++<<----------[+>.>.<+<]""")
 | 
|  |    133 | // =>  Map(69 -> 61, 5 -> 20, 60 -> 70, 27 -> 44, 43 -> 28, 19 -> 6)
 | 
|  |    134 | 
 | 
| 460 |    135 | 
 | 
| 472 |    136 | def compute2(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = {
 | 
|  |    137 |   if (0 <= pc && pc < pg.length) { 
 | 
|  |    138 |     val (new_pc, new_mp, new_mem) = pg(pc) match {
 | 
|  |    139 |       case '>' => (pc + 1, mp + 1, mem)
 | 
|  |    140 |       case '<' => (pc + 1, mp - 1, mem)
 | 
|  |    141 |       case '+' => (pc + 1, mp, write(mem, mp, sread(mem, mp) + 1))
 | 
|  |    142 |       case '-' => (pc + 1, mp, write(mem, mp, sread(mem, mp) - 1))
 | 
|  |    143 |       case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) }
 | 
|  |    144 |       case '['  => if (sread(mem, mp) == 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    145 |       case ']'  => if (sread(mem, mp) != 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    146 |       case _ => (pc + 1, mp, mem)
 | 
|  |    147 |     }		     
 | 
|  |    148 |     compute2(pg, tb, new_pc, new_mp, new_mem)	
 | 
|  |    149 |   }
 | 
|  |    150 |   else mem
 | 
|  |    151 | }
 | 
|  |    152 | 
 | 
|  |    153 | 
 | 
|  |    154 | def run2(pg: String, m: Mem = Map()) = 
 | 
|  |    155 |   compute2(pg, jtable(pg), 0, 0, m)
 | 
|  |    156 | 
 | 
|  |    157 | //time_needed(1, run2(load_bff("benchmark.bf")))
 | 
|  |    158 | 
 | 
|  |    159 | 
 | 
|  |    160 | 
 | 
|  |    161 | // (6) Write a function optimise which deletes "dead code" (everything
 | 
|  |    162 | // that is not a bf-command) and also replaces substrings of the form
 | 
|  |    163 | // [-] by a new command 0. The idea is that the loop [-] just resets the
 | 
|  |    164 | // memory at the current location to 0. In the compute3 and run3 functions
 | 
|  |    165 | // below you implement this command by writing the number 0 to mem(mp), 
 | 
|  |    166 | // that is write(mem, mp, 0). 
 | 
|  |    167 | //
 | 
|  |    168 | // The easiest way to modify a string in this way is to use the regular
 | 
|  |    169 | // expression """[^<>+-.\[\]""", which recognises everything that is 
 | 
|  |    170 | // not a bf-command and replace it by the empty string. Similarly the
 | 
|  |    171 | // regular expression """\[-\]""" finds all occurences of [-] and 
 | 
|  |    172 | // by using the Scala method .replaceAll you can repplace it with the 
 | 
|  |    173 | // string "0" standing for the new bf-command.
 | 
|  |    174 | 
 | 
|  |    175 | def optimise(s: String) : String = {
 | 
|  |    176 |   s.replaceAll("""[^<>+-.\[\]]""","")
 | 
|  |    177 |    .replaceAll("""\[-\]""", "0")
 | 
|  |    178 | }
 | 
|  |    179 | 
 | 
| 235 |    180 | 
 | 
| 460 |    181 | def compute3(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = {
 | 
| 472 |    182 |   if (0 <= pc && pc < pg.length) { 
 | 
|  |    183 |     val (new_pc, new_mp, new_mem) = pg(pc) match {
 | 
|  |    184 |       case '0' => (pc + 1, mp, write(mem, mp, 0))
 | 
|  |    185 |       case '>' => (pc + 1, mp + 1, mem)
 | 
|  |    186 |       case '<' => (pc + 1, mp - 1, mem)
 | 
|  |    187 |       case '+' => (pc + 1, mp, write(mem, mp, sread(mem, mp) + 1))
 | 
|  |    188 |       case '-' => (pc + 1, mp, write(mem, mp, sread(mem, mp) - 1))
 | 
|  |    189 |       case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) }
 | 
|  |    190 |       case '['  => if (sread(mem, mp) == 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    191 |       case ']'  => if (sread(mem, mp) != 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    192 |       case _ => (pc + 1, mp, mem)
 | 
|  |    193 |     }		     
 | 
|  |    194 |     compute3(pg, tb, new_pc, new_mp, new_mem)	
 | 
|  |    195 |   }
 | 
|  |    196 |   else mem
 | 
| 235 |    197 | }
 | 
|  |    198 | 
 | 
| 472 |    199 | def run3(pg: String, m: Mem = Map()) = { 
 | 
|  |    200 |   val pg_opt = optimise(pg)
 | 
|  |    201 |   compute3(pg_opt, jtable(pg_opt), 0, 0, m)
 | 
| 235 |    202 | }
 | 
|  |    203 | 
 | 
|  |    204 | 
 | 
|  |    205 | // testcases
 | 
| 472 |    206 | 
 | 
|  |    207 | //println(optimise(load_bff("collatz.bf")))
 | 
|  |    208 | //optimise(load_bff("benchmark.bf"))          // should have inserted 0's
 | 
|  |    209 | //optimise(load_bff("mandelbrot.bf")).length  // => 11203
 | 
|  |    210 |  
 | 
|  |    211 | //time_needed(1, run3(load_bff("benchmark.bf")))
 | 
|  |    212 | 
 | 
| 235 |    213 | 
 | 
|  |    214 | 
 | 
| 472 |    215 | // (7)  Write a function combine which replaces sequences
 | 
|  |    216 | // of repated increment and decrement commands by appropriate
 | 
|  |    217 | // two-character commands. For example for sequences of +
 | 
|  |    218 | //
 | 
|  |    219 | //              orig bf-cmds  | replacement
 | 
|  |    220 | //            ------------------------------
 | 
|  |    221 | //              +             | +A 
 | 
|  |    222 | //              ++            | +B
 | 
|  |    223 | //              +++           | +C
 | 
|  |    224 | //                            |
 | 
|  |    225 | //              ...           |
 | 
|  |    226 | //                            | 
 | 
|  |    227 | //              +++....+++    | +Z
 | 
|  |    228 | //                (where length = 26)
 | 
|  |    229 | //
 | 
|  |    230 | //  Similar for the bf-command -, > and <. All other commands should
 | 
|  |    231 | //  be unaffected by this change.
 | 
|  |    232 | //
 | 
|  |    233 | //  Adapt the compute4 and run4 functions such that they can deal
 | 
|  |    234 | //  appropriately with such two-character commands.
 | 
| 460 |    235 | 
 | 
| 472 |    236 | def splice(cs: List[Char], acc: List[(Char, Int)]) : List[(Char, Int)] = (cs, acc) match {
 | 
|  |    237 |   case (Nil, acc) => acc  
 | 
|  |    238 |   case ('[' :: cs, acc) => splice(cs, ('[', 1) :: acc)
 | 
|  |    239 |   case (']' :: cs, acc) => splice(cs, (']', 1) :: acc)
 | 
|  |    240 |   case ('.' :: cs, acc) => splice(cs, ('.', 1) :: acc)
 | 
|  |    241 |   case ('0' :: cs, acc) => splice(cs, ('0', 1) :: acc)
 | 
|  |    242 |   case (c :: cs, Nil) => splice(cs, List((c, 1)))
 | 
|  |    243 |   case (c :: cs, (d, n) :: acc) => 
 | 
|  |    244 |     if (c == d && n < 26) splice(cs, (c, n + 1) :: acc)
 | 
|  |    245 |     else splice(cs, (c, 1) :: (d, n) :: acc)
 | 
|  |    246 | }
 | 
|  |    247 | 
 | 
|  |    248 | def spl(s: String) = splice(s.toList, Nil).reverse
 | 
|  |    249 | 
 | 
|  |    250 | //spl(load_bff("benchmark.bf"))
 | 
|  |    251 | 
 | 
|  |    252 | def combine(s: String) : String = {
 | 
|  |    253 |   (for ((c, n) <- spl(s)) yield c match {
 | 
|  |    254 |     case '>' => List('>', (n + '@').toChar)
 | 
|  |    255 |     case '<' => List('<', (n + '@').toChar)
 | 
|  |    256 |     case '+' => List('+', (n + '@').toChar)
 | 
|  |    257 |     case '-' => List('-', (n + '@').toChar)
 | 
|  |    258 |     case _ => List(c)
 | 
|  |    259 |   }).flatten.mkString
 | 
|  |    260 | }
 | 
|  |    261 | 
 | 
|  |    262 | 
 | 
|  |    263 | //combine(load_bff("benchmark.bf"))
 | 
| 235 |    264 | 
 | 
| 460 |    265 | def compute4(pg: String, tb: Map[Int, Int], pc: Int, mp: Int, mem: Mem) : Mem = {
 | 
| 472 |    266 |   if (0 <= pc && pc < pg.length) { 
 | 
|  |    267 |     val (new_pc, new_mp, new_mem) = pg(pc) match {
 | 
|  |    268 |       case '0' => (pc + 1, mp, write(mem, mp, 0))
 | 
|  |    269 |       case '>' => (pc + 2, mp + (pg(pc + 1) - '@'), mem)
 | 
|  |    270 |       case '<' => (pc + 2, mp - (pg(pc + 1) - '@'), mem)
 | 
|  |    271 |       case '+' => (pc + 2, mp, write(mem, mp, sread(mem, mp) + (pg(pc + 1) - '@')))
 | 
|  |    272 |       case '-' => (pc + 2, mp, write(mem, mp, sread(mem, mp) - (pg(pc + 1) - '@')))
 | 
|  |    273 |       case '.' => { print(sread(mem, mp).toChar); (pc + 1, mp, mem) }
 | 
|  |    274 |       case '['  => if (sread(mem, mp) == 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    275 |       case ']'  => if (sread(mem, mp) != 0) (tb(pc), mp, mem) else (pc + 1, mp, mem) 
 | 
|  |    276 |       case _ => (pc + 1, mp, mem)
 | 
|  |    277 |     }		     
 | 
|  |    278 |     compute4(pg, tb, new_pc, new_mp, new_mem)	
 | 
|  |    279 |   }
 | 
|  |    280 |   else mem
 | 
| 384 |    281 | }
 | 
|  |    282 | 
 | 
| 472 |    283 | def run4(pg: String, m: Mem = Map()) = { 
 | 
|  |    284 |   val pg_opt = combine(optimise(pg))
 | 
|  |    285 |   compute4(pg_opt, jtable(pg_opt), 0, 0, m)
 | 
| 404 |    286 | }
 | 
|  |    287 | 
 | 
|  |    288 | // testcases
 | 
| 472 |    289 | //println(combine(optimise(load_bff("mandelbrot.bf").drop(123))))
 | 
|  |    290 | 
 | 
|  |    291 | //combine(optimise(load_bff("benchmark.bf"))) // => """>A+B[<A+M>A-A]<A[[....."""
 | 
| 404 |    292 | 
 | 
| 472 |    293 | //time_needed(1, run4(load_bff("benchmark.bf")))
 | 
| 404 |    294 | 
 | 
| 472 |    295 | //time_needed(1, run(load_bff("sierpinski.bf"))) 
 | 
|  |    296 | //time_needed(1, run4(load_bff("sierpinski.bf"))) 
 | 
| 404 |    297 | 
 | 
| 472 |    298 | //println(time_needed(1, run4(load_bff("mandelbrot.bf"))))
 | 
| 404 |    299 | 
 | 
| 235 |    300 | 
 | 
|  |    301 | 
 | 
|  |    302 | 
 | 
|  |    303 | 
 | 
| 472 |    304 | }
 | 
|  |    305 | 
 | 
|  |    306 | /*
 | 
|  |    307 | import CW10b._
 | 
|  |    308 | println(time_needed(1, run(load_bff("collatz.bf"))))
 | 
|  |    309 | println(time_needed(1, run2(load_bff("collatz.bf"))))
 | 
|  |    310 | println(time_needed(1, run3(load_bff("collatz.bf"))))
 | 
|  |    311 | println(time_needed(1, run4(load_bff("collatz.bf"))))
 | 
|  |    312 | */
 |