datatype bit = Z | S | C of chartype bits = bit listdatatype rexp = ZERO | ONE | CHAR of char | ALTS of rexp list | SEQ of rexp * rexp | STAR of rexp | RECD of string * rexpfun alt r1 r2 = ALTS [r1, r2]datatype arexp = AZERO | AONE of bits | ACHAR of bits * char | AALTS of bits * (arexp list) | ASEQ of bits * arexp * arexp | ASTAR of bits * arexp datatype value = Empty | Chr of char | Sequ of value * value | Left of value | Right of value | Stars of value list | Rec of string * value(* some helper functions for strings *) fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s))(* some helper functions for rexps *)fun seq s = case s of [] => ONE | [c] => CHAR(c) | c::cs => SEQ(CHAR(c), seq cs)fun chr c = CHAR(c)fun str s = seq(explode s)fun plus r = SEQ(r, STAR(r))infix 9 ++infix 9 --infix 9 $fun op ++ (r1, r2) = ALTS [r1, r2]fun op -- (r1, r2) = SEQ(r1, r2)fun op $ (x, r) = RECD(x, r)fun alts rs = case rs of [] => ZERO | [r] => r | r::rs => ALTS([r, alts rs])fun sum (nil) = 0 | sum (head::tail) = head + sum(tail);(* size of a regular expressions - for testing purposes *)fun size r = case r of ZERO => 1 | ONE => 1 | CHAR(_) => 1 | ALTS(rs) => 1 + sum (map size rs) | SEQ(r1, r2) => 1 + (size r1) + (size r2) | STAR(r) => 1 + (size r) | RECD(_, r) => 1 + (size r)fun erase r = case r of AZERO => ZERO | AONE(_) => ONE | ACHAR(_, c) => CHAR(c) | AALTS(_, rs) => ALTS(map erase rs) | ASEQ(_, r1, r2) => SEQ(erase r1, erase r2) | ASTAR(_, r)=> STAR(erase r)fun fuse bs r = case r of AZERO => AZERO | AONE(cs) => AONE(bs @ cs) | ACHAR(cs, c) => ACHAR(bs @ cs, c) | AALTS(cs, rs) => AALTS(bs @ cs, rs) | ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2) | ASTAR(cs, r) => ASTAR(bs @ cs, r)fun internalise r = case r of ZERO => AZERO| ONE => AONE([])| CHAR(c) => ACHAR([], c)| ALTS([r1, r2]) => AALTS([], [fuse [Z] (internalise r1), fuse [S] (internalise r2)])| SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2)| STAR(r) => ASTAR([], internalise r)| RECD(x, r) => internalise rfun decode_aux r bs = case (r, bs) of (ONE, bs) => (Empty, bs)| (CHAR(c), bs) => (Chr(c), bs)| (ALTS([r1]), bs) => decode_aux r1 bs| (ALTS(rs), Z::bs1) => let val (v, bs2) = decode_aux (hd rs) bs1 in (Left(v), bs2) end| (ALTS(rs), S::bs1) => let val (v, bs2) = decode_aux (ALTS (tl rs)) bs1 in (Right(v), bs2) end| (SEQ(r1, r2), bs) => let val (v1, bs1) = decode_aux r1 bs val (v2, bs2) = decode_aux r2 bs1 in (Sequ(v1, v2), bs2) end| (STAR(r1), Z::bs) => let val (v, bs1) = decode_aux r1 bs val (Stars(vs), bs2) = decode_aux (STAR r1) bs1 in (Stars(v::vs), bs2) end| (STAR(_), S::bs) => (Stars [], bs)| (RECD(x, r1), bs) => let val (v, bs1) = decode_aux r1 bs in (Rec(x, v), bs1) endexception DecodeErrorfun decode r bs = case (decode_aux r bs) of (v, []) => v| _ => raise DecodeErrorfun bnullable r = case r of AZERO => false| AONE(_) => true| ACHAR(_, _) => false| AALTS(_, rs) => List.exists bnullable rs| ASEQ(_, r1, r2) => bnullable(r1) andalso bnullable(r2)| ASTAR(_, _) => truefun bmkeps r = case r of AONE(bs) => bs| AALTS(bs, rs) => let val SOME(r) = List.find bnullable rs in bs @ bmkeps(r) end| ASEQ(bs, r1, r2) => bs @ bmkeps(r1) @ bmkeps(r2)| ASTAR(bs, r) => bs @ [S]fun bder c r = case r of AZERO => AZERO| AONE(_) => AZERO| ACHAR(bs, d) => if c = d then AONE(bs) else AZERO| AALTS(bs, rs) => AALTS(bs, map (bder c) rs)| ASEQ(bs, r1, r2) => if (bnullable r1) then AALTS(bs, [ASEQ([], bder c r1, r2), fuse (bmkeps r1) (bder c r2)]) else ASEQ(bs, bder c r1, r2)| ASTAR(bs, r) => ASEQ(bs, fuse [Z] (bder c r), ASTAR([], r))fun bders s r = case s of [] => r| c::s => bders s (bder c r)exception LexErrorfun blex r s = case s of [] => if (bnullable r) then bmkeps r else raise LexError | c::cs => blex (bder c r) csfun blexing r s = decode r (blex (internalise r) (explode s))(* Simplification *)fun distinctBy xs f acc = case xs of [] => [] | x::xs => let val res = f x in if (List.exists (fn x => x = res) acc) then distinctBy xs f acc else x::distinctBy xs f (res::acc) endfun flats rs = case rs of [] => [] | AZERO::rs1 => flats rs1 | AALTS(bs, rs1)::rs2 => map (fuse bs) rs1 @ flats rs2 | r1::rs2 => r1::flats rs2fun stack r1 r2 = case r1 of AONE(bs2) => fuse bs2 r2 | _ => ASEQ([], r1, r2)fun bsimp r = case r of ASEQ(bs1, r1, r2) => (case (bsimp r1, bsimp r2) of (AZERO, _) => AZERO | (_, AZERO) => AZERO | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s | (AALTS(bs2, rs), r2s) => AALTS(bs1 @ bs2, map (fn r => stack r r2s) rs) | (r1s, r2s) => ASEQ(bs1, r1s, r2s)) | AALTS(bs1, rs) => (case distinctBy (flats (map bsimp rs)) erase [] of [] => AZERO | [r] => fuse bs1 r | rs2 => AALTS(bs1, rs2)) | r => rfun bders_simp r s = case s of [] => r| c::s => bders_simp (bsimp (bder c r)) sfun blex_simp r s = case s of [] => if (bnullable r) then bmkeps r else raise LexError | c::cs => blex_simp (bsimp (bder c r)) csfun blexing_simp r s = decode r (blex_simp (internalise r) (explode s))(* Lexing rules for a small WHILE language *)val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"))val digit = alts (List.map chr (explode "0123456789"))val idents = sym -- STAR(sym ++ digit)val nums = plus(digit)val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"])val semicolon = str ";"val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"])val whitespace = plus(str " " ++ str "\n" ++ str "\t")val rparen = str ")"val lparen = str "("val begin_paren = str "{"val end_paren = str "}"val while_regs = STAR(("k" $ keywords) ++ ("i" $ idents) ++ ("o" $ ops) ++ ("n" $ nums) ++ ("s" $ semicolon) ++ ("p" $ (lparen ++ rparen)) ++ ("b" $ (begin_paren ++ end_paren)) ++ ("w" $ whitespace))(* Some Tests ============ *)fun time f x = let val t_start = Timer.startCPUTimer() val f_x = (f x; f x; f x; f x; f x) val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 5.0in (print ((Real.toString t_end) ^ "\n"); f_x)endval prog2 = String.concatWith "\n" ["i := 2;", "max := 100;", "while i < max do {", " isprime := 1;", " j := 2;", " while (j * j) <= i + 1 do {", " if i % j == 0 then isprime := 0 else skip;", " j := j + 1", " };", " if isprime == 1 then write i else skip;", " i := i + 1", "}"];(* loops in ML *)datatype for = to of int * intinfix to val for = fn lo to up => (fn f => let fun loop lo = if lo > up then () else (f lo; loop (lo + 1)) in loop lo end)fun forby n = fn lo to up => (fn f => let fun loop lo = if lo > up then () else (f lo; loop (lo + n)) in loop lo end)fun step_simp i = (print ((Int.toString i) ^ ": ") ; time (blexing_simp while_regs) (string_repeat prog2 i)); (*val main1 = forby 1000 (1000 to 5000) step_simp;print "\n";val main2 = forby 1000 (1000 to 5000) step_simp2;print "\n";val main3 = forby 1000 (1000 to 5000) step_acc;print "\n";val main4 = forby 1000 (1000 to 5000) step_acc2; *)print "\n";val main5 = forby 10 (10 to 50) step_simp; print("Size after 50: " ^ PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 50))))) ^ "\n");print("Size after 100: " ^ PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 100))))) ^ "\n");