diff -r 0eaa1851a5b6 -r db0ff630bbb7 progs/sml/re-bit.ML --- a/progs/sml/re-bit.ML Sat Mar 16 11:15:22 2019 +0000 +++ b/progs/sml/re-bit.ML Thu Apr 11 17:37:00 2019 +0100 @@ -1,20 +1,27 @@ + +datatype bit = + Z | S | C of char + +type bits = bit list datatype rexp = ZERO | ONE - | CHAR of char - | ALT of rexp * rexp + | CHAR of char + | ALTS of rexp list | SEQ of rexp * rexp | STAR of rexp | RECD of string * rexp +fun alt r1 r2 = ALTS [r1, r2] + datatype arexp = AZERO - | AONE of (bool list) - | ACHAR of (bool list) * char - | AALT of (bool list) * arexp * arexp - | ASEQ of (bool list) * arexp * arexp - | ASTAR of (bool list) * arexp + | AONE of bits + | ACHAR of bits * char + | AALTS of bits * (arexp list) + | ASEQ of bits * arexp * arexp + | ASTAR of bits * arexp datatype value = Empty @@ -44,269 +51,54 @@ infix 9 -- infix 9 $ -fun op ++ (r1, r2) = ALT(r1, r2) +fun op ++ (r1, r2) = ALTS [r1, r2] fun op -- (r1, r2) = SEQ(r1, r2) fun op $ (x, r) = RECD(x, r) -fun alts rs = case rs of +fun alts rs = case rs of [] => ZERO | [r] => r - | r::rs => List.foldl (op ++) r rs + | r::rs => ALTS([r, alts rs]) + + +fun sum (nil) = 0 + | sum (head::tail) = head + sum(tail); (* size of a regular expressions - for testing purposes *) fun size r = case r of ZERO => 1 | ONE => 1 | CHAR(_) => 1 - | ALT(r1, r2) => 1 + (size r1) + (size r2) + | ALTS(rs) => 1 + sum (map size rs) | SEQ(r1, r2) => 1 + (size r1) + (size r2) | STAR(r) => 1 + (size r) | RECD(_, r) => 1 + (size r) -(* nullable function: tests whether the regular - expression can recognise the empty string *) -fun nullable r = case r of - ZERO => false - | ONE => true - | CHAR(_) => false - | ALT(r1, r2) => nullable(r1) orelse nullable(r2) - | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) - | STAR(_) => true - | RECD(_, r) => nullable(r) -(* derivative of a regular expression r w.r.t. a character c *) -fun der c r = case r of - ZERO => ZERO - | ONE => ZERO - | CHAR(d) => if c = d then ONE else ZERO - | ALT(r1, r2) => ALT(der c r1, der c r2) - | SEQ(r1, r2) => - if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) - else SEQ(der c r1, r2) - | STAR(r) => SEQ(der c r, STAR(r)) - | RECD(_, r) => der c r - -(* derivative w.r.t. a list of chars (iterates der) *) -fun ders s r = case s of - [] => r - | c::s => ders s (der c r) - -(* extracts a string from value *) -fun flatten v = case v of - Empty => "" - | Chr(c) => Char.toString c - | Left(v) => flatten v - | Right(v) => flatten v - | Sequ(v1, v2) => flatten v1 ^ flatten v2 - | Stars(vs) => String.concat (List.map flatten vs) - | Rec(_, v) => flatten v - - -(* extracts an environment from a value *) -fun env v = case v of - Empty => [] - | Chr(c) => [] - | Left(v) => env v - | Right(v) => env v - | Sequ(v1, v2) => env v1 @ env v2 - | Stars(vs) => List.foldr (op @) [] (List.map env vs) - | Rec(x, v) => (x, flatten v) :: env v - -fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")" -fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) +fun erase r = case r of + AZERO => ZERO + | AONE(_) => ONE + | ACHAR(_, c) => CHAR(c) + | AALTS(_, rs) => ALTS(map erase rs) + | ASEQ(_, r1, r2) => SEQ(erase r1, erase r2) + | ASTAR(_, r)=> STAR(erase r) -(* the value for a nullable rexp *) -fun mkeps r = case r of - ONE => Empty - | ALT(r1, r2) => - if nullable r1 then Left(mkeps r1) else Right(mkeps r2) - | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) - | STAR(r) => Stars([]) - | RECD(x, r) => Rec(x, mkeps r) - -exception Error - -(* injection of a char into a value *) -fun inj r c v = case (r, v) of - (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs) - | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) - | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) - | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) - | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) - | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) - | (CHAR(d), Empty) => Chr(d) - | (RECD(x, r1), _) => Rec(x, inj r1 c v) - | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); - print ("v: " ^ PolyML.makestring v ^ "\n"); - raise Error) - -(* some "rectification" functions for simplification *) -fun f_id v = v -fun f_right f = fn v => Right(f v) -fun f_left f = fn v => Left(f v) -fun f_alt f1 f2 = fn v => case v of - Right(v) => Right(f2 v) - | Left(v) => Left(f1 v) -fun f_seq f1 f2 = fn v => case v of - Sequ(v1, v2) => Sequ(f1 v1, f2 v2) -fun f_seq_Empty1 f1 f2 = fn v => Sequ(f1 Empty, f2 v) -fun f_seq_Empty2 f1 f2 = fn v => Sequ(f1 v, f2 Empty) -fun f_rec f = fn v => case v of - Rec(x, v) => Rec(x, f v) - -exception ShouldNotHappen - -fun f_error v = raise ShouldNotHappen - -(* simplification of regular expressions returning also an - rectification function; no simplification under STARs *) -fun simp r = case r of - ALT(r1, r2) => - let val (r1s, f1s) = simp r1 - val (r2s, f2s) = simp r2 in - (case (r1s, r2s) of - (ZERO, _) => (r2s, f_right f2s) - | (_, ZERO) => (r1s, f_left f1s) - | (_, _) => if r1s = r2s then (r1s, f_left f1s) - else (ALT (r1s, r2s), f_alt f1s f2s)) - end - | SEQ(r1, r2) => - let val (r1s, f1s) = simp r1 - val (r2s, f2s) = simp r2 in - (case (r1s, r2s) of - (ZERO, _) => (ZERO, f_error) - | (_, ZERO) => (ZERO, f_error) - | (ONE, _) => (r2s, f_seq_Empty1 f1s f2s) - | (_, ONE) => (r1s, f_seq_Empty2 f1s f2s) - | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) - end - | RECD(x, r1) => - let val (r1s, f1s) = simp r1 in - (RECD(x, r1s), f_rec f1s) - end - | r => (r, f_id) - -fun der_simp c r = case r of - ZERO => (ZERO, f_id) - | ONE => (ZERO, f_id) - | CHAR(d) => ((if c = d then ONE else ZERO), f_id) - | ALT(r1, r2) => - let - val (r1d, f1d) = der_simp c r1 - val (r2d, f2d) = der_simp c r2 - in - case (r1d, r2d) of - (ZERO, _) => (r2d, f_right f2d) - | (_, ZERO) => (r1d, f_left f1d) - | (_, _) => if r1d = r2d then (r1d, f_left f1d) - else (ALT (r1d, r2d), f_alt f1d f2d) - end - | SEQ(r1, r2) => - if nullable r1 - then - let - val (r1d, f1d) = der_simp c r1 - val (r2d, f2d) = der_simp c r2 - val (r2s, f2s) = simp r2 - in - case (r1d, r2s, r2d) of - (ZERO, _, _) => (r2d, f_right f2d) - | (_, ZERO, _) => (r2d, f_right f2d) - | (_, _, ZERO) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) - | (ONE, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d) - | (_, ONE, _) => (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d) - | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) - end - else - let - val (r1d, f1d) = der_simp c r1 - val (r2s, f2s) = simp r2 - in - case (r1d, r2s) of - (ZERO, _) => (ZERO, f_error) - | (_, ZERO) => (ZERO, f_error) - | (ONE, _) => (r2s, f_seq_Empty1 f1d f2s) - | (_, ONE) => (r1d, f_seq_Empty2 f1d f2s) - | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) - end - | STAR(r1) => - let - val (r1d, f1d) = der_simp c r1 - in - case r1d of - ZERO => (ZERO, f_error) - | ONE => (STAR r1, f_seq_Empty1 f1d f_id) - | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) - end - | RECD(x, r1) => der_simp c r1 - -(* matcher function *) -fun matcher r s = nullable(ders (explode s) r) - -(* lexing function (produces a value) *) -exception LexError - -fun lex r s = case s of - [] => if (nullable r) then mkeps r else raise LexError - | c::cs => inj r c (lex (der c r) cs) - -fun lexing r s = lex r (explode s) - -(* lexing with simplification *) -fun lex_simp r s = case s of - [] => if (nullable r) then mkeps r else raise LexError - | c::cs => - let val (r_simp, f_simp) = simp (der c r) in - inj r c (f_simp (lex_simp r_simp cs)) - end - -fun lexing_simp r s = lex_simp r (explode s) - -fun lex_simp2 r s = case s of - [] => if (nullable r) then mkeps r else raise LexError - | c::cs => - let val (r_simp, f_simp) = der_simp c r in - inj r c (f_simp (lex_simp2 r_simp cs)) - end - -fun lexing_simp2 r s = lex_simp2 r (explode s) - -fun lex_acc r s f = case s of - [] => if (nullable r) then f (mkeps r) else raise LexError - | c::cs => - let val (r_simp, f_simp) = simp (der c r) in - lex_acc r_simp cs (fn v => f (inj r c (f_simp v))) - end - -fun lexing_acc r s = lex_acc r (explode s) (f_id) - -fun lex_acc2 r s f = case s of - [] => if (nullable r) then f (mkeps r) else raise LexError - | c::cs => - let val (r_simp, f_simp) = der_simp c r in - lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v))) - end - -fun lexing_acc2 r s = lex_acc2 r (explode s) (f_id) - -(* bit-coded version *) - fun fuse bs r = case r of - AZERO => AZERO -| AONE(cs) => AONE(bs @ cs) -| ACHAR(cs, c) => ACHAR(bs @ cs, c) -| AALT(cs, r1, r2) => AALT(bs @ cs, r1, r2) -| ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2) -| ASTAR(cs, r) => ASTAR(bs @ cs, r) + AZERO => AZERO + | AONE(cs) => AONE(bs @ cs) + | ACHAR(cs, c) => ACHAR(bs @ cs, c) + | AALTS(cs, rs) => AALTS(bs @ cs, rs) + | ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2) + | ASTAR(cs, r) => ASTAR(bs @ cs, r) fun internalise r = case r of ZERO => AZERO | ONE => AONE([]) | CHAR(c) => ACHAR([], c) -| ALT(r1, r2) => AALT([], fuse [false] (internalise r1), fuse [true] (internalise r2)) +| ALTS([r1, r2]) => AALTS([], [fuse [Z] (internalise r1), fuse [S] (internalise r2)]) | SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2) | STAR(r) => ASTAR([], internalise r) | RECD(x, r) => internalise r @@ -314,21 +106,22 @@ fun decode_aux r bs = case (r, bs) of (ONE, bs) => (Empty, bs) | (CHAR(c), bs) => (Chr(c), bs) -| (ALT(r1, r2), false::bs) => - let val (v, bs1) = decode_aux r1 bs - in (Left(v), bs1) end -| (ALT(r1, r2), true::bs) => - let val (v, bs1) = decode_aux r2 bs - in (Right(v), bs1) end +| (ALTS([r1]), bs) => decode_aux r1 bs +| (ALTS(rs), Z::bs1) => + let val (v, bs2) = decode_aux (hd rs) bs1 + in (Left(v), bs2) end +| (ALTS(rs), S::bs1) => + let val (v, bs2) = decode_aux (ALTS (tl rs)) bs1 + in (Right(v), bs2) end | (SEQ(r1, r2), bs) => let val (v1, bs1) = decode_aux r1 bs val (v2, bs2) = decode_aux r2 bs1 in (Sequ(v1, v2), bs2) end -| (STAR(r1), false::bs) => +| (STAR(r1), Z::bs) => let val (v, bs1) = decode_aux r1 bs val (Stars(vs), bs2) = decode_aux (STAR r1) bs1 in (Stars(v::vs), bs2) end -| (STAR(_), true::bs) => (Stars [], bs) +| (STAR(_), S::bs) => (Stars [], bs) | (RECD(x, r1), bs) => let val (v, bs1) = decode_aux r1 bs in (Rec(x, v), bs1) end @@ -339,61 +132,95 @@ (v, []) => v | _ => raise DecodeError -fun anullable r = case r of +fun bnullable r = case r of AZERO => false | AONE(_) => true -| ACHAR(_,_) => false -| AALT(_, r1, r2) => anullable(r1) orelse anullable(r2) -| ASEQ(_, r1, r2) => anullable(r1) andalso anullable(r2) +| ACHAR(_, _) => false +| AALTS(_, rs) => List.exists bnullable rs +| ASEQ(_, r1, r2) => bnullable(r1) andalso bnullable(r2) | ASTAR(_, _) => true -fun mkepsBC r = case r of +fun bmkeps r = case r of AONE(bs) => bs -| AALT(bs, r1, r2) => - if anullable(r1) then bs @ mkepsBC(r1) else bs @ mkepsBC(r2) -| ASEQ(bs, r1, r2) => bs @ mkepsBC(r1) @ mkepsBC(r2) -| ASTAR(bs, r) => bs @ [true] +| AALTS(bs, rs) => + let + val SOME(r) = List.find bnullable rs + in bs @ bmkeps(r) end +| ASEQ(bs, r1, r2) => bs @ bmkeps(r1) @ bmkeps(r2) +| ASTAR(bs, r) => bs @ [S] -fun ader c r = case r of +fun bder c r = case r of AZERO => AZERO | AONE(_) => AZERO | ACHAR(bs, d) => if c = d then AONE(bs) else AZERO -| AALT(bs, r1, r2) => AALT(bs, ader c r1, ader c r2) +| AALTS(bs, rs) => AALTS(bs, map (bder c) rs) | ASEQ(bs, r1, r2) => - if (anullable r1) then AALT(bs, ASEQ([], ader c r1, r2), fuse (mkepsBC r1) (ader c r2)) - else ASEQ(bs, ader c r1, r2) -| ASTAR(bs, r) => ASEQ(bs, fuse [false] (ader c r), ASTAR([], r)) + if (bnullable r1) + then AALTS(bs, [ASEQ([], bder c r1, r2), fuse (bmkeps r1) (bder c r2)]) + else ASEQ(bs, bder c r1, r2) +| ASTAR(bs, r) => ASEQ(bs, fuse [Z] (bder c r), ASTAR([], r)) -fun aders s r = case s of +fun bders s r = case s of [] => r -| c::s => aders s (ader c r) +| c::s => bders s (bder c r) + + +exception LexError + +fun blex r s = case s of + [] => if (bnullable r) then bmkeps r else raise LexError + | c::cs => blex (bder c r) cs + +fun blexing r s = decode r (blex (internalise r) (explode s)) -fun alex r s = case s of - [] => if (anullable r) then mkepsBC r else raise LexError - | c::cs => alex (ader c r) cs +(* Simplification *) -fun alexing r s = decode r (alex (internalise r) (explode s)) +fun distinctBy xs f acc = case xs of + [] => [] + | x::xs => + let + val res = f x + in if (List.exists (fn x => x = res) acc) + then distinctBy xs f acc + else x::distinctBy xs f (res::acc) + end -fun asimp r = case r of - ASEQ(bs1, r1, r2) => (case (asimp r1, asimp r2) of - (AZERO, _) => AZERO - | (_, AZERO) => AZERO - | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s - | (r1s, r2s) => ASEQ(bs1, r1s, r2s) - ) -| AALT(bs1, r1, r2) => (case (asimp r1, asimp r2) of - (AZERO, r2s) => fuse bs1 r2s - | (r1s, AZERO) => fuse bs1 r1s - | (r1s, r2s) => AALT(bs1, r1s, r2s) - ) -| r => r +fun flats rs = case rs of + [] => [] + | AZERO::rs1 => flats rs1 + | AALTS(bs, rs1)::rs2 => map (fuse bs) rs1 @ flats rs2 + | r1::rs2 => r1::flats rs2 + + +fun stack r1 r2 = case r1 of + AONE(bs2) => fuse bs2 r2 + | _ => ASEQ([], r1, r2) + -fun alex_simp r s = case s of - [] => if (anullable r) then mkepsBC r else raise LexError - | c::cs => alex_simp (asimp (ader c r)) cs +fun bsimp r = case r of + ASEQ(bs1, r1, r2) => (case (bsimp r1, bsimp r2) of + (AZERO, _) => AZERO + | (_, AZERO) => AZERO + | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s + | (AALTS(bs2, rs), r2s) => + AALTS(bs1 @ bs2, map (fn r => stack r r2s) rs) + | (r1s, r2s) => ASEQ(bs1, r1s, r2s)) + | AALTS(bs1, rs) => (case distinctBy (flats (map bsimp rs)) erase [] of + [] => AZERO + | [r] => fuse bs1 r + | rs2 => AALTS(bs1, rs2)) + | r => r -fun alexing_simp r s = decode r (alex_simp (internalise r) (explode s)) +fun bders_simp r s = case s of + [] => r +| c::s => bders_simp (bsimp (bder c r)) s +fun blex_simp r s = case s of + [] => if (bnullable r) then bmkeps r else raise LexError + | c::cs => blex_simp (bsimp (bder c r)) cs + +fun blexing_simp r s = + decode r (blex_simp (internalise r) (explode s)) (* Lexing rules for a small WHILE language *) @@ -428,45 +255,12 @@ fun time f x = let val t_start = Timer.startCPUTimer() - val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x) - val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0 + val f_x = (f x; f x; f x; f x; f x) + val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 5.0 in (print ((Real.toString t_end) ^ "\n"); f_x) end -val prog = "ab"; -val reg = ("x" $ ((str "a") -- (str "b"))); -print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n"); -print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n"); -print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n"); - -fun fst (x, y) = x; -fun snd (x, y) = y; - -val derS = [reg, - der #"a" reg, - fst (simp (der #"a" reg)), - fst (der_simp #"a" reg)]; - -val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")), - (snd (der_simp #"a" reg)) (Chr(#"b")) - ]; - -print("Ders: \n" ^ - String.concatWith "\n" (List.map PolyML.makestring derS) - ^ "\n\n"); -print("Vs: \n" ^ - String.concatWith "\n" (List.map PolyML.makestring vS) - ^ "\n\n"); - - -val prog0 = "read n"; -print("Env0 is: \n" ^ string_of_env (env (lexing_acc while_regs prog0)) ^ "\n"); - -val prog1 = "read n; write (n)"; -print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n"); -print("Env1 is: \n" ^ string_of_env (env (alexing while_regs prog1)) ^ "\n"); - val prog2 = String.concatWith "\n" ["i := 2;", @@ -483,12 +277,6 @@ "}"]; -let - val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) -in - print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n") -end; - (* loops in ML *) datatype for = to of int * int infix to @@ -510,28 +298,7 @@ fun step_simp i = (print ((Int.toString i) ^ ": ") ; - time (lexing_simp while_regs) (string_repeat prog2 i)); - -fun step_simp2 i = - (print ((Int.toString i) ^ ": ") ; - time (lexing_simp2 while_regs) (string_repeat prog2 i)); - -fun step_acc i = - (print ((Int.toString i) ^ ": ") ; - time (lexing_acc while_regs) (string_repeat prog2 i)); - -fun step_acc2 i = - (print ((Int.toString i) ^ ": ") ; - time (lexing_acc2 while_regs) (string_repeat prog2 i)); - -fun astep_basic i = - (print ((Int.toString i) ^ ": ") ; - time (alexing while_regs) (string_repeat prog2 i)); - -fun astep_simp i = - (print ((Int.toString i) ^ ": ") ; - time (alexing_simp while_regs) (string_repeat prog2 i)); - + time (blexing_simp while_regs) (string_repeat prog2 i)); (* val main1 = forby 1000 (1000 to 5000) step_simp; @@ -544,4 +311,11 @@ *) print "\n"; -val main5 = forby 1 (1 to 5) astep_simp; \ No newline at end of file +val main5 = forby 10 (10 to 50) step_simp; + +print("Size after 50: " ^ + PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 50))))) ^ "\n"); + +print("Size after 100: " ^ + PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 100))))) ^ "\n"); +