progs/sml/re-bit.ML
changeset 317 db0ff630bbb7
parent 159 940530087f30
equal deleted inserted replaced
316:0eaa1851a5b6 317:db0ff630bbb7
       
     1 
       
     2 datatype bit =
       
     3   Z | S | C of char
       
     4 
       
     5 type bits = bit list
     1 
     6 
     2 datatype rexp =
     7 datatype rexp =
     3    ZERO
     8    ZERO
     4  | ONE 
     9  | ONE 
     5  | CHAR of char
    10  | CHAR of char 
     6  | ALT  of rexp * rexp
    11  | ALTS of rexp list
     7  | SEQ  of rexp * rexp 
    12  | SEQ  of rexp * rexp 
     8  | STAR of rexp 
    13  | STAR of rexp 
     9  | RECD of string * rexp
    14  | RECD of string * rexp
    10 
    15 
       
    16 fun alt r1 r2 = ALTS [r1, r2]
       
    17 
    11 datatype arexp =
    18 datatype arexp =
    12    AZERO
    19    AZERO
    13  | AONE  of (bool list)
    20  | AONE  of bits
    14  | ACHAR of (bool list) * char
    21  | ACHAR of bits * char
    15  | AALT  of (bool list) * arexp * arexp
    22  | AALTS of bits * (arexp list)
    16  | ASEQ  of (bool list) * arexp * arexp 
    23  | ASEQ  of bits * arexp * arexp 
    17  | ASTAR of (bool list) * arexp 
    24  | ASTAR of bits * arexp 
    18 
    25 
    19 datatype value =
    26 datatype value =
    20    Empty
    27    Empty
    21  | Chr of char
    28  | Chr of char
    22  | Sequ of value * value
    29  | Sequ of value * value
    42 
    49 
    43 infix 9 ++
    50 infix 9 ++
    44 infix 9 --
    51 infix 9 --
    45 infix 9 $
    52 infix 9 $
    46 
    53 
    47 fun op ++ (r1, r2) = ALT(r1, r2)
    54 fun op ++ (r1, r2) = ALTS [r1, r2]
    48 
    55 
    49 fun op -- (r1, r2) = SEQ(r1, r2)
    56 fun op -- (r1, r2) = SEQ(r1, r2)
    50 
    57 
    51 fun op $ (x, r) = RECD(x, r)
    58 fun op $ (x, r) = RECD(x, r)
    52 
    59 
    53 fun alts rs = case rs of
    60 fun alts rs = case rs of 
    54     [] => ZERO
    61     [] => ZERO
    55   | [r] => r
    62   | [r] => r
    56   | r::rs => List.foldl (op ++) r rs
    63   | r::rs => ALTS([r, alts rs])
       
    64 
       
    65 
       
    66 fun sum (nil) = 0 
       
    67   | sum (head::tail) = head + sum(tail);
    57 
    68 
    58 (* size of a regular expressions - for testing purposes *)
    69 (* size of a regular expressions - for testing purposes *)
    59 fun size r = case r of
    70 fun size r = case r of
    60     ZERO => 1
    71     ZERO => 1
    61   | ONE => 1
    72   | ONE => 1
    62   | CHAR(_) => 1
    73   | CHAR(_) => 1
    63   | ALT(r1, r2) => 1 + (size r1) + (size r2)
    74   | ALTS(rs) => 1 + sum (map size rs)
    64   | SEQ(r1, r2) => 1 + (size r1) + (size r2)
    75   | SEQ(r1, r2) => 1 + (size r1) + (size r2)
    65   | STAR(r) => 1 + (size r)
    76   | STAR(r) => 1 + (size r)
    66   | RECD(_, r) => 1 + (size r)
    77   | RECD(_, r) => 1 + (size r)
    67 
    78 
    68 (* nullable function: tests whether the regular 
    79 
    69    expression can recognise the empty string *)
    80 fun erase r = case r of
    70 fun nullable r = case r of
    81     AZERO => ZERO
    71     ZERO => false
    82   | AONE(_) => ONE
    72   | ONE => true
    83   | ACHAR(_, c) => CHAR(c)
    73   | CHAR(_) => false
    84   | AALTS(_, rs) => ALTS(map erase rs)
    74   | ALT(r1, r2) => nullable(r1) orelse nullable(r2)
    85   | ASEQ(_, r1, r2) => SEQ(erase r1, erase r2)
    75   | SEQ(r1, r2) => nullable(r1) andalso nullable(r2)
    86   | ASTAR(_, r)=> STAR(erase r)
    76   | STAR(_) => true
    87 
    77   | RECD(_, r) => nullable(r)
       
    78 
       
    79 (* derivative of a regular expression r w.r.t. a character c *)
       
    80 fun der c r = case r of
       
    81     ZERO => ZERO
       
    82   | ONE => ZERO
       
    83   | CHAR(d) => if c = d then ONE else ZERO
       
    84   | ALT(r1, r2) => ALT(der c r1, der c r2)
       
    85   | SEQ(r1, r2) => 
       
    86       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
       
    87       else SEQ(der c r1, r2)
       
    88   | STAR(r) => SEQ(der c r, STAR(r))
       
    89   | RECD(_, r) => der c r
       
    90 
       
    91 (* derivative w.r.t. a list of chars (iterates der) *)
       
    92 fun ders s r = case s of 
       
    93     [] => r
       
    94   | c::s => ders s (der c r)
       
    95 
       
    96 (* extracts a string from value *)
       
    97 fun flatten v = case v of 
       
    98     Empty => ""
       
    99   | Chr(c) => Char.toString c
       
   100   | Left(v) => flatten v
       
   101   | Right(v) => flatten v
       
   102   | Sequ(v1, v2) => flatten v1 ^ flatten v2
       
   103   | Stars(vs) => String.concat (List.map flatten vs)
       
   104   | Rec(_, v) => flatten v
       
   105 
       
   106 
       
   107 (* extracts an environment from a value *)
       
   108 fun env v = case v of 
       
   109     Empty => []
       
   110   | Chr(c) => []
       
   111   | Left(v) => env v
       
   112   | Right(v) => env v
       
   113   | Sequ(v1, v2) => env v1 @ env v2
       
   114   | Stars(vs) => List.foldr (op @) [] (List.map env vs)
       
   115   | Rec(x, v) => (x, flatten v) :: env v
       
   116 
       
   117 fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")"
       
   118 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs)
       
   119 
       
   120 
       
   121 (* the value for a nullable rexp *)
       
   122 fun mkeps r = case r of 
       
   123     ONE => Empty
       
   124   | ALT(r1, r2) => 
       
   125       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
       
   126   | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2)
       
   127   | STAR(r) => Stars([])
       
   128   | RECD(x, r) => Rec(x, mkeps r)
       
   129 
       
   130 exception Error
       
   131 
       
   132 (* injection of a char into a value *)
       
   133 fun inj r c v = case (r, v) of
       
   134     (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs)
       
   135   | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2)
       
   136   | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2)
       
   137   | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2)
       
   138   | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1)
       
   139   | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2)
       
   140   | (CHAR(d), Empty) => Chr(d) 
       
   141   | (RECD(x, r1), _) => Rec(x, inj r1 c v)
       
   142   | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n");
       
   143           print ("v: " ^ PolyML.makestring v ^ "\n");
       
   144           raise Error)
       
   145 
       
   146 (* some "rectification" functions for simplification *)
       
   147 fun f_id v = v
       
   148 fun f_right f = fn v => Right(f v)
       
   149 fun f_left f = fn v => Left(f v)
       
   150 fun f_alt f1 f2 = fn v => case v of
       
   151     Right(v) => Right(f2 v)
       
   152   | Left(v) => Left(f1 v)
       
   153 fun f_seq f1 f2 = fn v => case v of 
       
   154   Sequ(v1, v2) => Sequ(f1 v1, f2 v2)
       
   155 fun f_seq_Empty1 f1 f2 = fn v => Sequ(f1 Empty, f2 v)
       
   156 fun f_seq_Empty2 f1 f2 = fn v => Sequ(f1 v, f2 Empty)
       
   157 fun f_rec f = fn v => case v of
       
   158     Rec(x, v) => Rec(x, f v)
       
   159 
       
   160 exception ShouldNotHappen
       
   161 
       
   162 fun f_error v = raise ShouldNotHappen
       
   163 
       
   164 (* simplification of regular expressions returning also an 
       
   165    rectification function; no simplification under STARs *)
       
   166 fun simp r = case r of
       
   167     ALT(r1, r2) => 
       
   168       let val (r1s, f1s) = simp r1  
       
   169           val (r2s, f2s) = simp r2 in
       
   170         (case (r1s, r2s) of
       
   171             (ZERO, _) => (r2s, f_right f2s)
       
   172           | (_, ZERO) => (r1s, f_left f1s)
       
   173           | (_, _)    => if r1s = r2s then (r1s, f_left f1s)
       
   174                          else (ALT (r1s, r2s), f_alt f1s f2s))
       
   175       end 
       
   176   | SEQ(r1, r2) => 
       
   177       let val (r1s, f1s) = simp r1 
       
   178           val (r2s, f2s) = simp r2 in
       
   179         (case (r1s, r2s) of
       
   180           (ZERO, _)  => (ZERO, f_error)
       
   181         | (_, ZERO)  => (ZERO, f_error)
       
   182         | (ONE, _) => (r2s, f_seq_Empty1 f1s f2s)
       
   183         | (_, ONE) => (r1s, f_seq_Empty2 f1s f2s)
       
   184         | (_, _)     => (SEQ(r1s, r2s), f_seq f1s f2s))
       
   185       end  
       
   186   | RECD(x, r1) => 
       
   187       let val (r1s, f1s) = simp r1 in
       
   188         (RECD(x, r1s), f_rec f1s)
       
   189       end
       
   190   | r => (r, f_id)
       
   191 
       
   192 fun der_simp c r = case r of
       
   193     ZERO => (ZERO, f_id)
       
   194   | ONE => (ZERO, f_id)
       
   195   | CHAR(d) => ((if c = d then ONE else ZERO), f_id)
       
   196   | ALT(r1, r2) => 
       
   197       let 
       
   198         val (r1d, f1d) = der_simp c r1 
       
   199         val (r2d, f2d) = der_simp c r2 
       
   200       in
       
   201         case (r1d, r2d) of
       
   202           (ZERO, _) => (r2d, f_right f2d)
       
   203         | (_, ZERO) => (r1d, f_left f1d)
       
   204         | (_, _)    => if r1d = r2d then (r1d, f_left f1d)
       
   205                        else (ALT (r1d, r2d), f_alt f1d f2d)
       
   206       end
       
   207   | SEQ(r1, r2) => 
       
   208       if nullable r1 
       
   209       then 
       
   210         let 
       
   211           val (r1d, f1d) = der_simp c r1 
       
   212           val (r2d, f2d) = der_simp c r2
       
   213           val (r2s, f2s) = simp r2 
       
   214         in
       
   215           case (r1d, r2s, r2d) of
       
   216             (ZERO, _, _)  => (r2d, f_right f2d)
       
   217           | (_, ZERO, _)  => (r2d, f_right f2d)
       
   218           | (_, _, ZERO)  => (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
       
   219           | (ONE, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d)
       
   220           | (_, ONE, _) => (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d)
       
   221           | (_, _, _)     => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)
       
   222         end
       
   223       else 
       
   224         let 
       
   225           val (r1d, f1d) = der_simp c r1 
       
   226           val (r2s, f2s) = simp r2
       
   227         in
       
   228           case (r1d, r2s) of
       
   229             (ZERO, _) => (ZERO, f_error)
       
   230           | (_, ZERO) => (ZERO, f_error)
       
   231           | (ONE, _) => (r2s, f_seq_Empty1 f1d f2s)
       
   232           | (_, ONE) => (r1d, f_seq_Empty2 f1d f2s)
       
   233           | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s)
       
   234   	end	  
       
   235   | STAR(r1) => 
       
   236       let 
       
   237         val (r1d, f1d) = der_simp c r1 
       
   238       in
       
   239         case r1d of
       
   240           ZERO => (ZERO, f_error)
       
   241         | ONE => (STAR r1, f_seq_Empty1 f1d f_id)
       
   242         | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id)
       
   243       end
       
   244   | RECD(x, r1) => der_simp c r1 
       
   245 
       
   246 (* matcher function *)
       
   247 fun matcher r s = nullable(ders (explode s) r)
       
   248 
       
   249 (* lexing function (produces a value) *)
       
   250 exception LexError
       
   251 
       
   252 fun lex r s = case s of 
       
   253     [] => if (nullable r) then mkeps r else raise LexError
       
   254   | c::cs => inj r c (lex (der c r) cs)
       
   255 
       
   256 fun lexing r s = lex r (explode s)
       
   257 
       
   258 (* lexing with simplification *)
       
   259 fun lex_simp r s = case s of 
       
   260     [] => if (nullable r) then mkeps r else raise LexError
       
   261   | c::cs => 
       
   262     let val (r_simp, f_simp) = simp (der c r) in
       
   263       inj r c (f_simp (lex_simp r_simp cs))
       
   264     end
       
   265 
       
   266 fun lexing_simp r s = lex_simp r (explode s)
       
   267 
       
   268 fun lex_simp2 r s = case s of 
       
   269     [] => if (nullable r) then mkeps r else raise LexError
       
   270   | c::cs => 
       
   271     let val (r_simp, f_simp) = der_simp c r in
       
   272       inj r c (f_simp (lex_simp2 r_simp cs))
       
   273     end
       
   274 
       
   275 fun lexing_simp2 r s = lex_simp2 r (explode s)
       
   276 
       
   277 fun lex_acc r s f = case s of 
       
   278     [] => if (nullable r) then f (mkeps r) else raise LexError
       
   279   | c::cs => 
       
   280     let val (r_simp, f_simp) = simp (der c r) in
       
   281       lex_acc r_simp cs (fn v => f (inj r c (f_simp v)))
       
   282     end
       
   283 
       
   284 fun lexing_acc r s  = lex_acc r (explode s) (f_id)
       
   285 
       
   286 fun lex_acc2 r s f = case s of 
       
   287     [] => if (nullable r) then f (mkeps r) else raise LexError
       
   288   | c::cs => 
       
   289     let val (r_simp, f_simp) = der_simp c r in
       
   290       lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v)))
       
   291     end
       
   292 
       
   293 fun lexing_acc2 r s  = lex_acc2 r (explode s) (f_id)
       
   294 
       
   295 (* bit-coded version *)
       
   296 
    88 
   297 fun fuse bs r = case r of
    89 fun fuse bs r = case r of
   298   AZERO => AZERO
    90     AZERO => AZERO
   299 | AONE(cs) => AONE(bs @ cs)
    91   | AONE(cs) => AONE(bs @ cs)
   300 | ACHAR(cs, c) => ACHAR(bs @ cs, c)
    92   | ACHAR(cs, c) => ACHAR(bs @ cs, c)
   301 | AALT(cs, r1, r2) => AALT(bs @ cs, r1, r2)
    93   | AALTS(cs, rs) => AALTS(bs @ cs, rs)
   302 | ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2)
    94   | ASEQ(cs, r1, r2) => ASEQ(bs @ cs, r1, r2)
   303 | ASTAR(cs, r) => ASTAR(bs @ cs, r)
    95   | ASTAR(cs, r) => ASTAR(bs @ cs, r)
   304 
    96 
   305 fun internalise r = case r of
    97 fun internalise r = case r of
   306   ZERO => AZERO
    98   ZERO => AZERO
   307 | ONE => AONE([])
    99 | ONE => AONE([])
   308 | CHAR(c) => ACHAR([], c)
   100 | CHAR(c) => ACHAR([], c)
   309 | ALT(r1, r2) => AALT([], fuse [false] (internalise r1), fuse [true] (internalise r2))
   101 | ALTS([r1, r2]) => AALTS([], [fuse [Z] (internalise r1), fuse [S] (internalise r2)])
   310 | SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2)
   102 | SEQ(r1, r2) => ASEQ([], internalise r1, internalise r2)
   311 | STAR(r) => ASTAR([], internalise r)
   103 | STAR(r) => ASTAR([], internalise r)
   312 | RECD(x, r) => internalise r
   104 | RECD(x, r) => internalise r
   313 
   105 
   314 fun decode_aux r bs = case (r, bs) of
   106 fun decode_aux r bs = case (r, bs) of
   315   (ONE, bs) => (Empty, bs)
   107   (ONE, bs) => (Empty, bs)
   316 | (CHAR(c), bs) => (Chr(c), bs)
   108 | (CHAR(c), bs) => (Chr(c), bs)
   317 | (ALT(r1, r2), false::bs) => 
   109 | (ALTS([r1]), bs) => decode_aux r1 bs
   318      let val (v, bs1) = decode_aux r1 bs
   110 | (ALTS(rs), Z::bs1) => 
   319      in (Left(v), bs1) end
   111      let val (v, bs2) = decode_aux (hd rs) bs1
   320 | (ALT(r1, r2), true::bs) => 
   112      in (Left(v), bs2) end
   321      let val (v, bs1) = decode_aux r2 bs
   113 | (ALTS(rs), S::bs1) => 
   322      in (Right(v), bs1) end
   114      let val (v, bs2) = decode_aux (ALTS (tl rs)) bs1
       
   115      in (Right(v), bs2) end
   323 | (SEQ(r1, r2), bs) => 
   116 | (SEQ(r1, r2), bs) => 
   324     let val (v1, bs1) = decode_aux r1 bs 
   117     let val (v1, bs1) = decode_aux r1 bs 
   325         val (v2, bs2) = decode_aux r2 bs1 
   118         val (v2, bs2) = decode_aux r2 bs1 
   326     in (Sequ(v1, v2), bs2) end
   119     in (Sequ(v1, v2), bs2) end
   327 | (STAR(r1), false::bs) => 
   120 | (STAR(r1), Z::bs) => 
   328     let val (v, bs1) = decode_aux r1 bs 
   121     let val (v, bs1) = decode_aux r1 bs 
   329         val (Stars(vs), bs2) = decode_aux (STAR r1) bs1
   122         val (Stars(vs), bs2) = decode_aux (STAR r1) bs1
   330     in (Stars(v::vs), bs2) end
   123     in (Stars(v::vs), bs2) end
   331 | (STAR(_), true::bs) => (Stars [], bs)
   124 | (STAR(_), S::bs) => (Stars [], bs)
   332 | (RECD(x, r1), bs) => 
   125 | (RECD(x, r1), bs) => 
   333     let val (v, bs1) = decode_aux r1 bs
   126     let val (v, bs1) = decode_aux r1 bs
   334     in (Rec(x, v), bs1) end
   127     in (Rec(x, v), bs1) end
   335 
   128 
   336 exception DecodeError
   129 exception DecodeError
   337 
   130 
   338 fun decode r bs = case (decode_aux r bs) of
   131 fun decode r bs = case (decode_aux r bs) of
   339   (v, []) => v
   132   (v, []) => v
   340 | _ => raise DecodeError
   133 | _ => raise DecodeError
   341 
   134 
   342 fun anullable r = case r of
   135 fun bnullable r = case r of
   343   AZERO => false
   136   AZERO => false
   344 | AONE(_) => true
   137 | AONE(_) => true
   345 | ACHAR(_,_) => false
   138 | ACHAR(_, _) => false
   346 | AALT(_, r1, r2) => anullable(r1) orelse anullable(r2)
   139 | AALTS(_, rs) => List.exists bnullable rs
   347 | ASEQ(_, r1, r2) => anullable(r1) andalso anullable(r2)
   140 | ASEQ(_, r1, r2) => bnullable(r1) andalso bnullable(r2)
   348 | ASTAR(_, _) => true
   141 | ASTAR(_, _) => true
   349 
   142 
   350 fun mkepsBC r = case r of
   143 fun bmkeps r = case r of
   351   AONE(bs) => bs
   144   AONE(bs) => bs
   352 | AALT(bs, r1, r2) => 
   145 | AALTS(bs, rs) => 
   353     if anullable(r1) then bs @ mkepsBC(r1) else bs @ mkepsBC(r2)
   146     let 
   354 | ASEQ(bs, r1, r2) => bs @ mkepsBC(r1) @ mkepsBC(r2)
   147       val SOME(r) = List.find bnullable rs
   355 | ASTAR(bs, r) => bs @ [true]
   148     in bs @ bmkeps(r) end
   356 
   149 | ASEQ(bs, r1, r2) => bs @ bmkeps(r1) @ bmkeps(r2)
   357 fun ader c r = case r of
   150 | ASTAR(bs, r) => bs @ [S]
       
   151 
       
   152 fun bder c r = case r of
   358   AZERO => AZERO
   153   AZERO => AZERO
   359 | AONE(_) => AZERO
   154 | AONE(_) => AZERO
   360 | ACHAR(bs, d) => if c = d then AONE(bs) else AZERO
   155 | ACHAR(bs, d) => if c = d then AONE(bs) else AZERO
   361 | AALT(bs, r1, r2) => AALT(bs, ader c r1, ader c r2)
   156 | AALTS(bs, rs) => AALTS(bs, map (bder c) rs)
   362 | ASEQ(bs, r1, r2) => 
   157 | ASEQ(bs, r1, r2) => 
   363     if (anullable r1) then AALT(bs, ASEQ([], ader c r1, r2), fuse (mkepsBC r1) (ader c r2))
   158     if (bnullable r1) 
   364     else ASEQ(bs, ader c r1, r2)
   159     then AALTS(bs, [ASEQ([], bder c r1, r2), fuse (bmkeps r1) (bder c r2)])
   365 | ASTAR(bs, r) => ASEQ(bs, fuse [false] (ader c r), ASTAR([], r))
   160     else ASEQ(bs, bder c r1, r2)
   366 
   161 | ASTAR(bs, r) => ASEQ(bs, fuse [Z] (bder c r), ASTAR([], r))
   367 fun aders s r = case s of 
   162 
       
   163 fun bders s r = case s of 
   368   [] => r
   164   [] => r
   369 | c::s => aders s (ader c r)
   165 | c::s => bders s (bder c r)
   370 
   166 
   371 fun alex r s = case s of 
   167 
   372     [] => if (anullable r) then mkepsBC r else raise LexError
   168 exception LexError
   373   | c::cs => alex (ader c r) cs
   169 
   374 
   170 fun blex r s = case s of 
   375 fun alexing r s = decode r (alex (internalise r) (explode s))
   171     [] => if (bnullable r) then bmkeps r else raise LexError
   376 
   172   | c::cs => blex (bder c r) cs
   377 fun asimp r = case r of
   173 
   378   ASEQ(bs1, r1, r2) => (case (asimp r1, asimp r2) of
   174 fun blexing r s = decode r (blex (internalise r) (explode s))
   379       (AZERO, _) => AZERO
   175 
   380     | (_, AZERO) => AZERO
   176 (* Simplification *)
   381     | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s
   177 
   382     | (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   178 fun distinctBy xs f acc = case xs of
   383   )
   179    [] => []
   384 | AALT(bs1, r1, r2) => (case (asimp r1, asimp r2) of
   180  | x::xs =>
   385       (AZERO, r2s) => fuse bs1 r2s
   181     let   
   386     | (r1s, AZERO) => fuse bs1 r1s
   182       val res = f x
   387     | (r1s, r2s) => AALT(bs1, r1s, r2s)
   183     in  if (List.exists (fn x => x = res) acc)
   388   )
   184         then distinctBy xs f acc  
   389 | r => r
   185         else x::distinctBy xs f (res::acc)
   390 
   186     end
   391 fun alex_simp r s = case s of 
   187 
   392     [] => if (anullable r) then mkepsBC r else raise LexError
   188 fun flats rs = case rs of
   393   | c::cs => alex_simp (asimp (ader c r)) cs
   189     [] => []
   394 
   190   | AZERO::rs1 => flats rs1
   395 fun alexing_simp r s = decode r (alex_simp (internalise r) (explode s))
   191   | AALTS(bs, rs1)::rs2 => map (fuse bs) rs1 @ flats rs2
   396 
   192   | r1::rs2 => r1::flats rs2
       
   193 
       
   194 
       
   195 fun stack r1 r2 = case r1 of
       
   196     AONE(bs2) => fuse bs2 r2
       
   197   | _ => ASEQ([], r1, r2)
       
   198 
       
   199 
       
   200 fun bsimp r = case r of
       
   201     ASEQ(bs1, r1, r2) => (case (bsimp r1, bsimp r2) of
       
   202         (AZERO, _) => AZERO
       
   203       | (_, AZERO) => AZERO
       
   204       | (AONE(bs2), r2s) => fuse (bs1 @ bs2) r2s
       
   205       | (AALTS(bs2, rs), r2s) =>  
       
   206            AALTS(bs1 @ bs2, map (fn r => stack r r2s) rs)
       
   207       | (r1s, r2s) => ASEQ(bs1, r1s, r2s)) 
       
   208   | AALTS(bs1, rs) => (case distinctBy (flats (map bsimp rs)) erase [] of
       
   209         [] => AZERO
       
   210       | [r] => fuse bs1 r
       
   211       | rs2 => AALTS(bs1, rs2))  
       
   212   | r => r
       
   213 
       
   214 fun bders_simp r s = case s of 
       
   215   [] => r
       
   216 | c::s => bders_simp (bsimp (bder c r)) s
       
   217 
       
   218 fun blex_simp r s = case s of 
       
   219     [] => if (bnullable r) then bmkeps r else raise LexError
       
   220   | c::cs => blex_simp (bsimp (bder c r)) cs
       
   221 
       
   222 fun blexing_simp r s = 
       
   223     decode r (blex_simp (internalise r) (explode s))
   397 
   224 
   398 
   225 
   399 (* Lexing rules for a small WHILE language *)
   226 (* Lexing rules for a small WHILE language *)
   400 val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"))
   227 val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"))
   401 val digit = alts (List.map chr (explode "0123456789"))
   228 val digit = alts (List.map chr (explode "0123456789"))
   426   ============ *)
   253   ============ *)
   427 
   254 
   428 fun time f x =
   255 fun time f x =
   429   let
   256   let
   430   val t_start = Timer.startCPUTimer()
   257   val t_start = Timer.startCPUTimer()
   431   val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x)
   258   val f_x = (f x; f x; f x; f x; f x)
   432   val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0
   259   val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 5.0
   433 in
   260 in
   434   (print ((Real.toString t_end) ^ "\n"); f_x)
   261   (print ((Real.toString t_end) ^ "\n"); f_x)
   435 end
   262 end
   436 
       
   437 val prog = "ab";
       
   438 val reg = ("x" $ ((str "a") -- (str "b")));
       
   439 print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n");
       
   440 print("Acc:  " ^ PolyML.makestring (lexing_acc  reg prog) ^ "\n");
       
   441 print("Env   " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n");
       
   442 
       
   443 fun fst (x, y) = x;
       
   444 fun snd (x, y) = y;
       
   445 
       
   446 val derS = [reg,
       
   447             der #"a" reg,
       
   448             fst (simp (der #"a" reg)),
       
   449             fst (der_simp #"a" reg)];
       
   450 
       
   451 val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")),
       
   452           (snd (der_simp #"a" reg)) (Chr(#"b"))
       
   453          ];
       
   454 
       
   455 print("Ders: \n" ^ 
       
   456        String.concatWith "\n" (List.map PolyML.makestring derS)
       
   457        ^ "\n\n");
       
   458 print("Vs: \n" ^ 
       
   459        String.concatWith "\n" (List.map PolyML.makestring vS)
       
   460        ^ "\n\n");
       
   461 
       
   462 
       
   463 val prog0 = "read n";
       
   464 print("Env0 is: \n" ^  string_of_env (env (lexing_acc while_regs prog0)) ^ "\n");
       
   465 
       
   466 val prog1 = "read  n; write (n)";
       
   467 print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n");
       
   468 print("Env1 is: \n" ^ string_of_env (env (alexing while_regs prog1)) ^ "\n");
       
   469 
   263 
   470 
   264 
   471 val prog2 = String.concatWith "\n" 
   265 val prog2 = String.concatWith "\n" 
   472   ["i := 2;",
   266   ["i := 2;",
   473    "max := 100;",
   267    "max := 100;",
   481    " if isprime == 1 then write i else skip;",
   275    " if isprime == 1 then write i else skip;",
   482    " i := i + 1",
   276    " i := i + 1",
   483    "}"];
   277    "}"];
   484 
   278 
   485 
   279 
   486 let 
       
   487   val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2)
       
   488 in
       
   489   print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n")
       
   490 end;
       
   491 
       
   492 (* loops in ML *)
   280 (* loops in ML *)
   493 datatype for = to of int * int
   281 datatype for = to of int * int
   494 infix to 
   282 infix to 
   495 
   283 
   496 val for =
   284 val for =
   508        in loop lo end)
   296        in loop lo end)
   509 
   297 
   510 
   298 
   511 fun step_simp i = 
   299 fun step_simp i = 
   512   (print ((Int.toString i) ^ ": ") ;
   300   (print ((Int.toString i) ^ ": ") ;
   513    time (lexing_simp while_regs) (string_repeat prog2 i)); 
   301    time (blexing_simp while_regs) (string_repeat prog2 i)); 
   514 
       
   515 fun step_simp2 i = 
       
   516   (print ((Int.toString i) ^ ": ") ;
       
   517    time (lexing_simp2 while_regs) (string_repeat prog2 i));
       
   518 
       
   519 fun step_acc i = 
       
   520   (print ((Int.toString i) ^ ": ") ;
       
   521    time (lexing_acc while_regs) (string_repeat prog2 i));
       
   522 
       
   523 fun step_acc2 i = 
       
   524   (print ((Int.toString i) ^ ": ") ;
       
   525    time (lexing_acc2 while_regs) (string_repeat prog2 i));
       
   526 
       
   527 fun astep_basic i = 
       
   528   (print ((Int.toString i) ^ ": ") ;
       
   529    time (alexing while_regs) (string_repeat prog2 i)); 
       
   530 
       
   531 fun astep_simp i = 
       
   532   (print ((Int.toString i) ^ ": ") ;
       
   533    time (alexing_simp while_regs) (string_repeat prog2 i)); 
       
   534 
       
   535 
   302 
   536 (*
   303 (*
   537 val main1 = forby 1000 (1000 to 5000) step_simp;
   304 val main1 = forby 1000 (1000 to 5000) step_simp;
   538 print "\n";
   305 print "\n";
   539 val main2 = forby 1000 (1000 to 5000) step_simp2;
   306 val main2 = forby 1000 (1000 to 5000) step_simp2;
   542 print "\n";
   309 print "\n";
   543 val main4 = forby 1000 (1000 to 5000) step_acc2; 
   310 val main4 = forby 1000 (1000 to 5000) step_acc2; 
   544 *)
   311 *)
   545 
   312 
   546 print "\n";
   313 print "\n";
   547 val main5 = forby 1 (1 to 5) astep_simp; 
   314 val main5 = forby 10 (10 to 50) step_simp; 
       
   315 
       
   316 print("Size after 50: " ^ 
       
   317   PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 50))))) ^ "\n");
       
   318 
       
   319 print("Size after 100: " ^ 
       
   320   PolyML.makestring(size (erase (bders_simp (internalise while_regs) (explode (string_repeat prog2 100))))) ^ "\n");
       
   321