+ −
datatype bit =+ −
Z | S | C of char+ −
+ −
type bits = bit list+ −
+ −
datatype rexp =+ −
ZERO+ −
| ONE + −
| CHAR of char + −
| ALTS of rexp list+ −
| SEQ of rexp * rexp + −
| STAR of rexp + −
| RECD of string * rexp+ −
+ −
fun alt r1 r2 = ALTS [r1, r2]+ −
+ −
datatype arexp =+ −
AZERO+ −
| AONE of bits+ −
| ACHAR of bits * char+ −
| AALTS of bits * (arexp list)+ −
| ASEQ of bits * arexp * arexp + −
| ASTAR of bits * arexp + −
+ −
datatype value =+ −
Empty+ −
| Chr of char+ −
| Sequ of value * value+ −
| Left of value+ −
| Right of value+ −
| Stars of value list+ −
| Rec of string * value+ −
+ −
(* some helper functions for strings *) + −
fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s))+ −
+ −
(* some helper functions for rexps *)+ −
fun seq s = case s of+ −
[] => ONE+ −
| [c] => CHAR(c)+ −
| c::cs => SEQ(CHAR(c), seq cs)+ −
+ −
fun chr c = CHAR(c)+ −
+ −
fun str s = seq(explode s)+ −
+ −
fun plus r = SEQ(r, STAR(r))+ −
+ −
infix 9 +++ −
infix 9 --+ −
infix 9 $+ −
+ −
fun op ++ (r1, r2) = ALTS [r1, r2]+ −
+ −
fun op -- (r1, r2) = SEQ(r1, r2)+ −
+ −
fun op $ (x, r) = RECD(x, r)+ −
+ −
fun alts rs = case rs of + −
[] => ZERO+ −
| [r] => r+ −
| r::rs => ALTS([r, alts rs])+ −
+ −
+ −
fun sum (nil) = 0 + −
| sum (head::tail) = head + sum(tail);+ −
+ −
(* size of a regular expressions - for testing purposes *)+ −
fun size r = case r of+ −
ZERO => 1+ −
| ONE => 1+ −
| CHAR(_) => 1+ −
| ALTS(rs) => 1 + sum (map size rs)+ −
| SEQ(r1, r2) => 1 + (size r1) + (size r2)+ −
| STAR(r) => 1 + (size r)+ −
| RECD(_, r) => 1 + (size r)+ −
+ −
+ −
fun erase r = case r of+ −
AZERO => ZERO+ −
| AONE(_) => ONE+ −
| ACHAR(_, c) => CHAR(c)+ −
| AALTS(_, rs) => ALTS(map erase rs)+ −
| ASEQ(_, r1, r2) => SEQ(erase r1, erase r2)+ −
| ASTAR(_, r)=> STAR(erase r)+ −
+ −
+ −
fun fuse bs r = case r of+ −
AZERO => AZERO+ −
| AONE(cs) => AONE(bs @ cs)+ −
| ACHAR(cs, c) => ACHAR(bs @ cs, c)+ −
| AALTS(cs, rs) => AALTS(bs @ cs, rs)+ −
| ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2)+ −
| ASTAR(cs, r) => ASTAR(bs @ cs, r)+ −
+ −
fun internalise r = case r of+ −
ZERO => AZERO+ −
| ONE => AONE([])+ −
| CHAR(c) => ACHAR([], c)+ −
| ALTS([r1, r2]) => AALTS([], [fuse [Z] (internalise r1), fuse [S] (internalise r2)])+ −
| SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2)+ −
| STAR(r) => ASTAR([], internalise r)+ −
| RECD(x, r) => internalise r+ −
+ −
fun decode_aux r bs = case (r, bs) of+ −
(ONE, bs) => (Empty, bs)+ −
| (CHAR(c), bs) => (Chr(c), bs)+ −
| (ALTS([r1]), bs) => decode_aux r1 bs+ −
| (ALTS(rs), Z::bs1) => + −
let val (v, bs2) = decode_aux (hd rs) bs1+ −
in (Left(v), bs2) end+ −
| (ALTS(rs), S::bs1) => + −
let val (v, bs2) = decode_aux (ALTS (tl rs)) bs1+ −
in (Right(v), bs2) end+ −
| (SEQ(r1, r2), bs) => + −
let val (v1, bs1) = decode_aux r1 bs + −
val (v2, bs2) = decode_aux r2 bs1 + −
in (Sequ(v1, v2), bs2) end+ −
| (STAR(r1), Z::bs) => + −
let val (v, bs1) = decode_aux r1 bs + −
val (Stars(vs), bs2) = decode_aux (STAR r1) bs1+ −
in (Stars(v::vs), bs2) end+ −
| (STAR(_), S::bs) => (Stars [], bs)+ −
| (RECD(x, r1), bs) => + −
let val (v, bs1) = decode_aux r1 bs+ −
in (Rec(x, v), bs1) end+ −
+ −
exception DecodeError+ −
+ −
fun decode r bs = case (decode_aux r bs) of+ −
(v, []) => v+ −
| _ => raise DecodeError+ −
+ −
fun bnullable r = case r of+ −
AZERO => false+ −
| AONE(_) => true+ −
| ACHAR(_, _) => false+ −
| AALTS(_, rs) => List.exists bnullable rs+ −
| ASEQ(_, r1, r2) => bnullable(r1) andalso bnullable(r2)+ −
| ASTAR(_, _) => true+ −
+ −
fun bmkeps r = case r of+ −
AONE(bs) => bs+ −
| AALTS(bs, rs) => + −
let + −
val SOME(r) = List.find bnullable rs+ −
in bs @ bmkeps(r) end+ −
| ASEQ(bs, r1, r2) => bs @ bmkeps(r1) @ bmkeps(r2)+ −
| ASTAR(bs, r) => bs @ [S]+ −
+ −
fun bder c r = case r of+ −
AZERO => AZERO+ −
| AONE(_) => AZERO+ −
| ACHAR(bs, d) => if c = d then AONE(bs) else AZERO+ −
| AALTS(bs, rs) => AALTS(bs, map (bder c) rs)+ −
| ASEQ(bs, r1, r2) => + −
if (bnullable r1) + −
then AALTS(bs, [ASEQ([], bder c r1, r2), fuse (bmkeps r1) (bder c r2)])+ −
else ASEQ(bs, bder c r1, r2)+ −
| ASTAR(bs, r) => ASEQ(bs, fuse [Z] (bder c r), ASTAR([], r))+ −
+ −
fun bders s r = case s of + −
[] => r+ −
| c::s => bders s (bder c r)+ −
+ −
+ −
exception LexError+ −
+ −
fun blex r s = case s of + −
[] => if (bnullable r) then bmkeps r else raise LexError+ −
| c::cs => blex (bder c r) cs+ −
+ −
fun blexing r s = decode r (blex (internalise r) (explode s))+ −
+ −
(* Simplification *)+ −
+ −
fun distinctBy xs f acc = case xs of+ −
[] => []+ −
| x::xs =>+ −
let + −
val res = f x+ −
in if (List.exists (fn x => x = res) acc)+ −
then distinctBy xs f acc + −
else x::distinctBy xs f (res::acc)+ −
end+ −
+ −
fun flats rs = case rs of+ −
[] => []+ −
| AZERO::rs1 => flats rs1+ −
| AALTS(bs, rs1)::rs2 => map (fuse bs) rs1 @ flats rs2+ −
| r1::rs2 => r1::flats rs2+ −
+ −
+ −
fun stack r1 r2 = case r1 of+ −
AONE(bs2) => fuse bs2 r2+ −
| _ => ASEQ([], r1, r2)+ −
+ −
+ −
fun bsimp r = case r of+ −
ASEQ(bs1, r1, r2) => (case (bsimp r1, bsimp r2) of+ −
(AZERO, _) => AZERO+ −
| (_, AZERO) => AZERO+ −
| (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s+ −
| (AALTS(bs2, rs), r2s) => + −
AALTS(bs1 @ bs2, map (fn r => stack r r2s) rs)+ −
| (r1s, r2s) => ASEQ(bs1, r1s, r2s)) + −
| AALTS(bs1, rs) => (case distinctBy (flats (map bsimp rs)) erase [] of+ −
[] => AZERO+ −
| [r] => fuse bs1 r+ −
| rs2 => AALTS(bs1, rs2)) + −
| r => r+ −
+ −
fun bders_simp r s = case s of + −
[] => r+ −
| c::s => bders_simp (bsimp (bder c r)) s+ −
+ −
fun blex_simp r s = case s of + −
[] => if (bnullable r) then bmkeps r else raise LexError+ −
| c::cs => blex_simp (bsimp (bder c r)) cs+ −
+ −
fun blexing_simp r s = + −
decode r (blex_simp (internalise r) (explode s))+ −
+ −
+ −
(* Lexing rules for a small WHILE language *)+ −
val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"))+ −
val digit = alts (List.map chr (explode "0123456789"))+ −
val idents = sym -- STAR(sym ++ digit)+ −
val nums = plus(digit)+ −
val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"])+ −
val semicolon = str ";"+ −
val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"])+ −
val whitespace = plus(str " " ++ str "\n" ++ str "\t")+ −
val rparen = str ")"+ −
val lparen = str "("+ −
val begin_paren = str "{"+ −
val end_paren = str "}"+ −
+ −
+ −
val while_regs = STAR(("k" $ keywords) +++ −
("i" $ idents) +++ −
("o" $ ops) ++ + −
("n" $ nums) ++ + −
("s" $ semicolon) ++ + −
("p" $ (lparen ++ rparen)) ++ + −
("b" $ (begin_paren ++ end_paren)) ++ + −
("w" $ whitespace))+ −
+ −
+ −
+ −
(* Some Tests+ −
============ *)+ −
+ −
fun time f x =+ −
let+ −
val t_start = Timer.startCPUTimer()+ −
val f_x = (f x; f x; f x; f x; f x)+ −
val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 5.0+ −
in+ −
(print ((Real.toString t_end) ^ "\n"); f_x)+ −
end+ −
+ −
+ −
val prog2 = String.concatWith "\n" + −
["i := 2;",+ −
"max := 100;",+ −
"while i < max do {",+ −
" isprime := 1;",+ −
" j := 2;",+ −
" while (j * j) <= i + 1 do {",+ −
" if i % j == 0 then isprime := 0 else skip;",+ −
" j := j + 1",+ −
" };",+ −
" if isprime == 1 then write i else skip;",+ −
" i := i + 1",+ −
"}"];+ −
+ −
+ −
(* loops in ML *)+ −
datatype for = to of int * int+ −
infix to + −
+ −
val for =+ −
fn lo to up =>+ −
(fn f => + −
let fun loop lo = + −
if lo > up then () else (f lo; loop (lo + 1))+ −
in loop lo end)+ −
+ −
fun forby n =+ −
fn lo to up =>+ −
(fn f => + −
let fun loop lo = + −
if lo > up then () else (f lo; loop (lo + n))+ −
in loop lo end)+ −
+ −
+ −
fun step_simp i = + −
(print ((Int.toString i) ^ ": ") ;+ −
time (blexing_simp while_regs) (string_repeat prog2 i)); + −
+ −
(*+ −
val main1 = forby 1000 (1000 to 5000) step_simp;+ −
print "\n";+ −
val main2 = forby 1000 (1000 to 5000) step_simp2;+ −
print "\n";+ −
val main3 = forby 1000 (1000 to 5000) step_acc;+ −
print "\n";+ −
val main4 = forby 1000 (1000 to 5000) step_acc2; + −
*)+ −
+ −
print "\n";+ −
val main5 = forby 10 (10 to 50) step_simp; + −
+ −
print("Size after 50: " ^ + −
PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 50))))) ^ "\n");+ −
+ −
print("Size after 100: " ^ + −
PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 100))))) ^ "\n");+ −
+ −