--- a/progs/sml/re-bit.ML Sat Mar 16 11:15:22 2019 +0000
+++ b/progs/sml/re-bit.ML Thu Apr 11 17:37:00 2019 +0100
@@ -1,20 +1,27 @@
+
+datatype bit =
+ Z | S | C of char
+
+type bits = bit list
datatype rexp =
ZERO
| ONE
- | CHAR of char
- | ALT of rexp * rexp
+ | CHAR of char
+ | ALTS of rexp list
| SEQ of rexp * rexp
| STAR of rexp
| RECD of string * rexp
+fun alt r1 r2 = ALTS [r1, r2]
+
datatype arexp =
AZERO
- | AONE of (bool list)
- | ACHAR of (bool list) * char
- | AALT of (bool list) * arexp * arexp
- | ASEQ of (bool list) * arexp * arexp
- | ASTAR of (bool list) * arexp
+ | AONE of bits
+ | ACHAR of bits * char
+ | AALTS of bits * (arexp list)
+ | ASEQ of bits * arexp * arexp
+ | ASTAR of bits * arexp
datatype value =
Empty
@@ -44,269 +51,54 @@
infix 9 --
infix 9 $
-fun op ++ (r1, r2) = ALT(r1, r2)
+fun op ++ (r1, r2) = ALTS [r1, r2]
fun op -- (r1, r2) = SEQ(r1, r2)
fun op $ (x, r) = RECD(x, r)
-fun alts rs = case rs of
+fun alts rs = case rs of
[] => ZERO
| [r] => r
- | r::rs => List.foldl (op ++) r rs
+ | r::rs => ALTS([r, alts rs])
+
+
+fun sum (nil) = 0
+ | sum (head::tail) = head + sum(tail);
(* size of a regular expressions - for testing purposes *)
fun size r = case r of
ZERO => 1
| ONE => 1
| CHAR(_) => 1
- | ALT(r1, r2) => 1 + (size r1) + (size r2)
+ | ALTS(rs) => 1 + sum (map size rs)
| SEQ(r1, r2) => 1 + (size r1) + (size r2)
| STAR(r) => 1 + (size r)
| RECD(_, r) => 1 + (size r)
-(* nullable function: tests whether the regular
- expression can recognise the empty string *)
-fun nullable r = case r of
- ZERO => false
- | ONE => true
- | CHAR(_) => false
- | ALT(r1, r2) => nullable(r1) orelse nullable(r2)
- | SEQ(r1, r2) => nullable(r1) andalso nullable(r2)
- | STAR(_) => true
- | RECD(_, r) => nullable(r)
-(* derivative of a regular expression r w.r.t. a character c *)
-fun der c r = case r of
- ZERO => ZERO
- | ONE => ZERO
- | CHAR(d) => if c = d then ONE else ZERO
- | ALT(r1, r2) => ALT(der c r1, der c r2)
- | SEQ(r1, r2) =>
- if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
- else SEQ(der c r1, r2)
- | STAR(r) => SEQ(der c r, STAR(r))
- | RECD(_, r) => der c r
-
-(* derivative w.r.t. a list of chars (iterates der) *)
-fun ders s r = case s of
- [] => r
- | c::s => ders s (der c r)
-
-(* extracts a string from value *)
-fun flatten v = case v of
- Empty => ""
- | Chr(c) => Char.toString c
- | Left(v) => flatten v
- | Right(v) => flatten v
- | Sequ(v1, v2) => flatten v1 ^ flatten v2
- | Stars(vs) => String.concat (List.map flatten vs)
- | Rec(_, v) => flatten v
-
-
-(* extracts an environment from a value *)
-fun env v = case v of
- Empty => []
- | Chr(c) => []
- | Left(v) => env v
- | Right(v) => env v
- | Sequ(v1, v2) => env v1 @ env v2
- | Stars(vs) => List.foldr (op @) [] (List.map env vs)
- | Rec(x, v) => (x, flatten v) :: env v
-
-fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")"
-fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs)
+fun erase r = case r of
+ AZERO => ZERO
+ | AONE(_) => ONE
+ | ACHAR(_, c) => CHAR(c)
+ | AALTS(_, rs) => ALTS(map erase rs)
+ | ASEQ(_, r1, r2) => SEQ(erase r1, erase r2)
+ | ASTAR(_, r)=> STAR(erase r)
-(* the value for a nullable rexp *)
-fun mkeps r = case r of
- ONE => Empty
- | ALT(r1, r2) =>
- if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
- | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2)
- | STAR(r) => Stars([])
- | RECD(x, r) => Rec(x, mkeps r)
-
-exception Error
-
-(* injection of a char into a value *)
-fun inj r c v = case (r, v) of
- (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs)
- | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2)
- | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2)
- | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2)
- | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1)
- | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2)
- | (CHAR(d), Empty) => Chr(d)
- | (RECD(x, r1), _) => Rec(x, inj r1 c v)
- | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n");
- print ("v: " ^ PolyML.makestring v ^ "\n");
- raise Error)
-
-(* some "rectification" functions for simplification *)
-fun f_id v = v
-fun f_right f = fn v => Right(f v)
-fun f_left f = fn v => Left(f v)
-fun f_alt f1 f2 = fn v => case v of
- Right(v) => Right(f2 v)
- | Left(v) => Left(f1 v)
-fun f_seq f1 f2 = fn v => case v of
- Sequ(v1, v2) => Sequ(f1 v1, f2 v2)
-fun f_seq_Empty1 f1 f2 = fn v => Sequ(f1 Empty, f2 v)
-fun f_seq_Empty2 f1 f2 = fn v => Sequ(f1 v, f2 Empty)
-fun f_rec f = fn v => case v of
- Rec(x, v) => Rec(x, f v)
-
-exception ShouldNotHappen
-
-fun f_error v = raise ShouldNotHappen
-
-(* simplification of regular expressions returning also an
- rectification function; no simplification under STARs *)
-fun simp r = case r of
- ALT(r1, r2) =>
- let val (r1s, f1s) = simp r1
- val (r2s, f2s) = simp r2 in
- (case (r1s, r2s) of
- (ZERO, _) => (r2s, f_right f2s)
- | (_, ZERO) => (r1s, f_left f1s)
- | (_, _) => if r1s = r2s then (r1s, f_left f1s)
- else (ALT (r1s, r2s), f_alt f1s f2s))
- end
- | SEQ(r1, r2) =>
- let val (r1s, f1s) = simp r1
- val (r2s, f2s) = simp r2 in
- (case (r1s, r2s) of
- (ZERO, _) => (ZERO, f_error)
- | (_, ZERO) => (ZERO, f_error)
- | (ONE, _) => (r2s, f_seq_Empty1 f1s f2s)
- | (_, ONE) => (r1s, f_seq_Empty2 f1s f2s)
- | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s))
- end
- | RECD(x, r1) =>
- let val (r1s, f1s) = simp r1 in
- (RECD(x, r1s), f_rec f1s)
- end
- | r => (r, f_id)
-
-fun der_simp c r = case r of
- ZERO => (ZERO, f_id)
- | ONE => (ZERO, f_id)
- | CHAR(d) => ((if c = d then ONE else ZERO), f_id)
- | ALT(r1, r2) =>
- let
- val (r1d, f1d) = der_simp c r1
- val (r2d, f2d) = der_simp c r2
- in
- case (r1d, r2d) of
- (ZERO, _) => (r2d, f_right f2d)
- | (_, ZERO) => (r1d, f_left f1d)
- | (_, _) => if r1d = r2d then (r1d, f_left f1d)
- else (ALT (r1d, r2d), f_alt f1d f2d)
- end
- | SEQ(r1, r2) =>
- if nullable r1
- then
- let
- val (r1d, f1d) = der_simp c r1
- val (r2d, f2d) = der_simp c r2
- val (r2s, f2s) = simp r2
- in
- case (r1d, r2s, r2d) of
- (ZERO, _, _) => (r2d, f_right f2d)
- | (_, ZERO, _) => (r2d, f_right f2d)
- | (_, _, ZERO) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
- | (ONE, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d)
- | (_, ONE, _) => (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d)
- | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)
- end
- else
- let
- val (r1d, f1d) = der_simp c r1
- val (r2s, f2s) = simp r2
- in
- case (r1d, r2s) of
- (ZERO, _) => (ZERO, f_error)
- | (_, ZERO) => (ZERO, f_error)
- | (ONE, _) => (r2s, f_seq_Empty1 f1d f2s)
- | (_, ONE) => (r1d, f_seq_Empty2 f1d f2s)
- | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s)
- end
- | STAR(r1) =>
- let
- val (r1d, f1d) = der_simp c r1
- in
- case r1d of
- ZERO => (ZERO, f_error)
- | ONE => (STAR r1, f_seq_Empty1 f1d f_id)
- | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id)
- end
- | RECD(x, r1) => der_simp c r1
-
-(* matcher function *)
-fun matcher r s = nullable(ders (explode s) r)
-
-(* lexing function (produces a value) *)
-exception LexError
-
-fun lex r s = case s of
- [] => if (nullable r) then mkeps r else raise LexError
- | c::cs => inj r c (lex (der c r) cs)
-
-fun lexing r s = lex r (explode s)
-
-(* lexing with simplification *)
-fun lex_simp r s = case s of
- [] => if (nullable r) then mkeps r else raise LexError
- | c::cs =>
- let val (r_simp, f_simp) = simp (der c r) in
- inj r c (f_simp (lex_simp r_simp cs))
- end
-
-fun lexing_simp r s = lex_simp r (explode s)
-
-fun lex_simp2 r s = case s of
- [] => if (nullable r) then mkeps r else raise LexError
- | c::cs =>
- let val (r_simp, f_simp) = der_simp c r in
- inj r c (f_simp (lex_simp2 r_simp cs))
- end
-
-fun lexing_simp2 r s = lex_simp2 r (explode s)
-
-fun lex_acc r s f = case s of
- [] => if (nullable r) then f (mkeps r) else raise LexError
- | c::cs =>
- let val (r_simp, f_simp) = simp (der c r) in
- lex_acc r_simp cs (fn v => f (inj r c (f_simp v)))
- end
-
-fun lexing_acc r s = lex_acc r (explode s) (f_id)
-
-fun lex_acc2 r s f = case s of
- [] => if (nullable r) then f (mkeps r) else raise LexError
- | c::cs =>
- let val (r_simp, f_simp) = der_simp c r in
- lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v)))
- end
-
-fun lexing_acc2 r s = lex_acc2 r (explode s) (f_id)
-
-(* bit-coded version *)
-
fun fuse bs r = case r of
- AZERO => AZERO
-| AONE(cs) => AONE(bs @ cs)
-| ACHAR(cs, c) => ACHAR(bs @ cs, c)
-| AALT(cs, r1, r2) => AALT(bs @ cs, r1, r2)
-| ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2)
-| ASTAR(cs, r) => ASTAR(bs @ cs, r)
+ AZERO => AZERO
+ | AONE(cs) => AONE(bs @ cs)
+ | ACHAR(cs, c) => ACHAR(bs @ cs, c)
+ | AALTS(cs, rs) => AALTS(bs @ cs, rs)
+ | ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2)
+ | ASTAR(cs, r) => ASTAR(bs @ cs, r)
fun internalise r = case r of
ZERO => AZERO
| ONE => AONE([])
| CHAR(c) => ACHAR([], c)
-| ALT(r1, r2) => AALT([], fuse [false] (internalise r1), fuse [true] (internalise r2))
+| ALTS([r1, r2]) => AALTS([], [fuse [Z] (internalise r1), fuse [S] (internalise r2)])
| SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2)
| STAR(r) => ASTAR([], internalise r)
| RECD(x, r) => internalise r
@@ -314,21 +106,22 @@
fun decode_aux r bs = case (r, bs) of
(ONE, bs) => (Empty, bs)
| (CHAR(c), bs) => (Chr(c), bs)
-| (ALT(r1, r2), false::bs) =>
- let val (v, bs1) = decode_aux r1 bs
- in (Left(v), bs1) end
-| (ALT(r1, r2), true::bs) =>
- let val (v, bs1) = decode_aux r2 bs
- in (Right(v), bs1) end
+| (ALTS([r1]), bs) => decode_aux r1 bs
+| (ALTS(rs), Z::bs1) =>
+ let val (v, bs2) = decode_aux (hd rs) bs1
+ in (Left(v), bs2) end
+| (ALTS(rs), S::bs1) =>
+ let val (v, bs2) = decode_aux (ALTS (tl rs)) bs1
+ in (Right(v), bs2) end
| (SEQ(r1, r2), bs) =>
let val (v1, bs1) = decode_aux r1 bs
val (v2, bs2) = decode_aux r2 bs1
in (Sequ(v1, v2), bs2) end
-| (STAR(r1), false::bs) =>
+| (STAR(r1), Z::bs) =>
let val (v, bs1) = decode_aux r1 bs
val (Stars(vs), bs2) = decode_aux (STAR r1) bs1
in (Stars(v::vs), bs2) end
-| (STAR(_), true::bs) => (Stars [], bs)
+| (STAR(_), S::bs) => (Stars [], bs)
| (RECD(x, r1), bs) =>
let val (v, bs1) = decode_aux r1 bs
in (Rec(x, v), bs1) end
@@ -339,61 +132,95 @@
(v, []) => v
| _ => raise DecodeError
-fun anullable r = case r of
+fun bnullable r = case r of
AZERO => false
| AONE(_) => true
-| ACHAR(_,_) => false
-| AALT(_, r1, r2) => anullable(r1) orelse anullable(r2)
-| ASEQ(_, r1, r2) => anullable(r1) andalso anullable(r2)
+| ACHAR(_, _) => false
+| AALTS(_, rs) => List.exists bnullable rs
+| ASEQ(_, r1, r2) => bnullable(r1) andalso bnullable(r2)
| ASTAR(_, _) => true
-fun mkepsBC r = case r of
+fun bmkeps r = case r of
AONE(bs) => bs
-| AALT(bs, r1, r2) =>
- if anullable(r1) then bs @ mkepsBC(r1) else bs @ mkepsBC(r2)
-| ASEQ(bs, r1, r2) => bs @ mkepsBC(r1) @ mkepsBC(r2)
-| ASTAR(bs, r) => bs @ [true]
+| AALTS(bs, rs) =>
+ let
+ val SOME(r) = List.find bnullable rs
+ in bs @ bmkeps(r) end
+| ASEQ(bs, r1, r2) => bs @ bmkeps(r1) @ bmkeps(r2)
+| ASTAR(bs, r) => bs @ [S]
-fun ader c r = case r of
+fun bder c r = case r of
AZERO => AZERO
| AONE(_) => AZERO
| ACHAR(bs, d) => if c = d then AONE(bs) else AZERO
-| AALT(bs, r1, r2) => AALT(bs, ader c r1, ader c r2)
+| AALTS(bs, rs) => AALTS(bs, map (bder c) rs)
| ASEQ(bs, r1, r2) =>
- if (anullable r1) then AALT(bs, ASEQ([], ader c r1, r2), fuse (mkepsBC r1) (ader c r2))
- else ASEQ(bs, ader c r1, r2)
-| ASTAR(bs, r) => ASEQ(bs, fuse [false] (ader c r), ASTAR([], r))
+ if (bnullable r1)
+ then AALTS(bs, [ASEQ([], bder c r1, r2), fuse (bmkeps r1) (bder c r2)])
+ else ASEQ(bs, bder c r1, r2)
+| ASTAR(bs, r) => ASEQ(bs, fuse [Z] (bder c r), ASTAR([], r))
-fun aders s r = case s of
+fun bders s r = case s of
[] => r
-| c::s => aders s (ader c r)
+| c::s => bders s (bder c r)
+
+
+exception LexError
+
+fun blex r s = case s of
+ [] => if (bnullable r) then bmkeps r else raise LexError
+ | c::cs => blex (bder c r) cs
+
+fun blexing r s = decode r (blex (internalise r) (explode s))
-fun alex r s = case s of
- [] => if (anullable r) then mkepsBC r else raise LexError
- | c::cs => alex (ader c r) cs
+(* Simplification *)
-fun alexing r s = decode r (alex (internalise r) (explode s))
+fun distinctBy xs f acc = case xs of
+ [] => []
+ | x::xs =>
+ let
+ val res = f x
+ in if (List.exists (fn x => x = res) acc)
+ then distinctBy xs f acc
+ else x::distinctBy xs f (res::acc)
+ end
-fun asimp r = case r of
- ASEQ(bs1, r1, r2) => (case (asimp r1, asimp r2) of
- (AZERO, _) => AZERO
- | (_, AZERO) => AZERO
- | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s
- | (r1s, r2s) => ASEQ(bs1, r1s, r2s)
- )
-| AALT(bs1, r1, r2) => (case (asimp r1, asimp r2) of
- (AZERO, r2s) => fuse bs1 r2s
- | (r1s, AZERO) => fuse bs1 r1s
- | (r1s, r2s) => AALT(bs1, r1s, r2s)
- )
-| r => r
+fun flats rs = case rs of
+ [] => []
+ | AZERO::rs1 => flats rs1
+ | AALTS(bs, rs1)::rs2 => map (fuse bs) rs1 @ flats rs2
+ | r1::rs2 => r1::flats rs2
+
+
+fun stack r1 r2 = case r1 of
+ AONE(bs2) => fuse bs2 r2
+ | _ => ASEQ([], r1, r2)
+
-fun alex_simp r s = case s of
- [] => if (anullable r) then mkepsBC r else raise LexError
- | c::cs => alex_simp (asimp (ader c r)) cs
+fun bsimp r = case r of
+ ASEQ(bs1, r1, r2) => (case (bsimp r1, bsimp r2) of
+ (AZERO, _) => AZERO
+ | (_, AZERO) => AZERO
+ | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s
+ | (AALTS(bs2, rs), r2s) =>
+ AALTS(bs1 @ bs2, map (fn r => stack r r2s) rs)
+ | (r1s, r2s) => ASEQ(bs1, r1s, r2s))
+ | AALTS(bs1, rs) => (case distinctBy (flats (map bsimp rs)) erase [] of
+ [] => AZERO
+ | [r] => fuse bs1 r
+ | rs2 => AALTS(bs1, rs2))
+ | r => r
-fun alexing_simp r s = decode r (alex_simp (internalise r) (explode s))
+fun bders_simp r s = case s of
+ [] => r
+| c::s => bders_simp (bsimp (bder c r)) s
+fun blex_simp r s = case s of
+ [] => if (bnullable r) then bmkeps r else raise LexError
+ | c::cs => blex_simp (bsimp (bder c r)) cs
+
+fun blexing_simp r s =
+ decode r (blex_simp (internalise r) (explode s))
(* Lexing rules for a small WHILE language *)
@@ -428,45 +255,12 @@
fun time f x =
let
val t_start = Timer.startCPUTimer()
- val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x)
- val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0
+ val f_x = (f x; f x; f x; f x; f x)
+ val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 5.0
in
(print ((Real.toString t_end) ^ "\n"); f_x)
end
-val prog = "ab";
-val reg = ("x" $ ((str "a") -- (str "b")));
-print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n");
-print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n");
-print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n");
-
-fun fst (x, y) = x;
-fun snd (x, y) = y;
-
-val derS = [reg,
- der #"a" reg,
- fst (simp (der #"a" reg)),
- fst (der_simp #"a" reg)];
-
-val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")),
- (snd (der_simp #"a" reg)) (Chr(#"b"))
- ];
-
-print("Ders: \n" ^
- String.concatWith "\n" (List.map PolyML.makestring derS)
- ^ "\n\n");
-print("Vs: \n" ^
- String.concatWith "\n" (List.map PolyML.makestring vS)
- ^ "\n\n");
-
-
-val prog0 = "read n";
-print("Env0 is: \n" ^ string_of_env (env (lexing_acc while_regs prog0)) ^ "\n");
-
-val prog1 = "read n; write (n)";
-print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n");
-print("Env1 is: \n" ^ string_of_env (env (alexing while_regs prog1)) ^ "\n");
-
val prog2 = String.concatWith "\n"
["i := 2;",
@@ -483,12 +277,6 @@
"}"];
-let
- val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2)
-in
- print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n")
-end;
-
(* loops in ML *)
datatype for = to of int * int
infix to
@@ -510,28 +298,7 @@
fun step_simp i =
(print ((Int.toString i) ^ ": ") ;
- time (lexing_simp while_regs) (string_repeat prog2 i));
-
-fun step_simp2 i =
- (print ((Int.toString i) ^ ": ") ;
- time (lexing_simp2 while_regs) (string_repeat prog2 i));
-
-fun step_acc i =
- (print ((Int.toString i) ^ ": ") ;
- time (lexing_acc while_regs) (string_repeat prog2 i));
-
-fun step_acc2 i =
- (print ((Int.toString i) ^ ": ") ;
- time (lexing_acc2 while_regs) (string_repeat prog2 i));
-
-fun astep_basic i =
- (print ((Int.toString i) ^ ": ") ;
- time (alexing while_regs) (string_repeat prog2 i));
-
-fun astep_simp i =
- (print ((Int.toString i) ^ ": ") ;
- time (alexing_simp while_regs) (string_repeat prog2 i));
-
+ time (blexing_simp while_regs) (string_repeat prog2 i));
(*
val main1 = forby 1000 (1000 to 5000) step_simp;
@@ -544,4 +311,11 @@
*)
print "\n";
-val main5 = forby 1 (1 to 5) astep_simp;
\ No newline at end of file
+val main5 = forby 10 (10 to 50) step_simp;
+
+print("Size after 50: " ^
+ PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 50))))) ^ "\n");
+
+print("Size after 100: " ^
+ PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 100))))) ^ "\n");
+