progs/sml/re.ML
changeset 156 6a43ea9305ba
parent 3 94824659f6d7
child 317 db0ff630bbb7
equal deleted inserted replaced
155:c9027db225cc 156:6a43ea9305ba
     1 
     1 
     2 datatype rexp =
     2 datatype rexp =
     3    NULL 
     3    ZERO
     4  | EMPTY 
     4  | ONE 
     5  | CHAR of char
     5  | CHAR of char
     6  | ALT of rexp * rexp
     6  | ALT of rexp * rexp
     7  | SEQ of rexp * rexp 
     7  | SEQ of rexp * rexp 
     8  | STAR of rexp 
     8  | STAR of rexp 
     9  | RECD of string * rexp
     9  | RECD of string * rexp
    10 
    10 
    11 datatype value =
    11 datatype value =
    12    Void
    12    Empty
    13  | Chr of char
    13  | Chr of char
    14  | Sequ of value * value
    14  | Sequ of value * value
    15  | Left of value
    15  | Left of value
    16  | Right of value
    16  | Right of value
    17  | Stars of value list
    17  | Stars of value list
    20 (* some helper functions for strings *)   
    20 (* some helper functions for strings *)   
    21 fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s))
    21 fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s))
    22 
    22 
    23 (* some helper functions for rexps *)
    23 (* some helper functions for rexps *)
    24 fun seq s = case s of
    24 fun seq s = case s of
    25     [] => EMPTY
    25     [] => ONE
    26   | [c] => CHAR(c)
    26   | [c] => CHAR(c)
    27   | c::cs => SEQ(CHAR(c), seq cs)
    27   | c::cs => SEQ(CHAR(c), seq cs)
    28 
    28 
    29 fun chr c = CHAR(c)
    29 fun chr c = CHAR(c)
    30 
    30 
    41 fun op -- (r1, r2) = SEQ(r1, r2)
    41 fun op -- (r1, r2) = SEQ(r1, r2)
    42 
    42 
    43 fun op $ (x, r) = RECD(x, r)
    43 fun op $ (x, r) = RECD(x, r)
    44 
    44 
    45 fun alts rs = case rs of
    45 fun alts rs = case rs of
    46     [] => NULL
    46     [] => ZERO
    47   | [r] => r
    47   | [r] => r
    48   | r::rs => List.foldl (op ++) r rs
    48   | r::rs => List.foldl (op ++) r rs
    49 
    49 
    50 
    50 
    51 (* size of a regular expressions - for testing purposes *)
    51 (* size of a regular expressions - for testing purposes *)
    52 fun size r = case r of
    52 fun size r = case r of
    53     NULL => 1
    53     ZERO => 1
    54   | EMPTY => 1
    54   | ONE => 1
    55   | CHAR(_) => 1
    55   | CHAR(_) => 1
    56   | ALT(r1, r2) => 1 + (size r1) + (size r2)
    56   | ALT(r1, r2) => 1 + (size r1) + (size r2)
    57   | SEQ(r1, r2) => 1 + (size r1) + (size r2)
    57   | SEQ(r1, r2) => 1 + (size r1) + (size r2)
    58   | STAR(r) => 1 + (size r)
    58   | STAR(r) => 1 + (size r)
    59   | RECD(_, r) => 1 + (size r)
    59   | RECD(_, r) => 1 + (size r)
    60 
    60 
    61 (* nullable function: tests whether the regular 
    61 (* nullable function: tests whether the regular 
    62    expression can recognise the empty string *)
    62    expression can recognise the empty string *)
    63 fun nullable r = case r of
    63 fun nullable r = case r of
    64     NULL => false
    64     ZERO => false
    65   | EMPTY => true
    65   | ONE => true
    66   | CHAR(_) => false
    66   | CHAR(_) => false
    67   | ALT(r1, r2) => nullable(r1) orelse nullable(r2)
    67   | ALT(r1, r2) => nullable(r1) orelse nullable(r2)
    68   | SEQ(r1, r2) => nullable(r1) andalso nullable(r2)
    68   | SEQ(r1, r2) => nullable(r1) andalso nullable(r2)
    69   | STAR(_) => true
    69   | STAR(_) => true
    70   | RECD(_, r) => nullable(r)
    70   | RECD(_, r) => nullable(r)
    71 
    71 
    72 (* derivative of a regular expression r w.r.t. a character c *)
    72 (* derivative of a regular expression r w.r.t. a character c *)
    73 fun der c r = case r of
    73 fun der c r = case r of
    74     NULL => NULL
    74     ZERO => ZERO
    75   | EMPTY => NULL
    75   | ONE => ZERO
    76   | CHAR(d) => if c = d then EMPTY else NULL
    76   | CHAR(d) => if c = d then ONE else ZERO
    77   | ALT(r1, r2) => ALT(der c r1, der c r2)
    77   | ALT(r1, r2) => ALT(der c r1, der c r2)
    78   | SEQ(r1, r2) => 
    78   | SEQ(r1, r2) => 
    79       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    79       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    80       else SEQ(der c r1, r2)
    80       else SEQ(der c r1, r2)
    81   | STAR(r) => SEQ(der c r, STAR(r))
    81   | STAR(r) => SEQ(der c r, STAR(r))
    86     [] => r
    86     [] => r
    87   | c::s => ders s (der c r)
    87   | c::s => ders s (der c r)
    88 
    88 
    89 (* extracts a string from value *)
    89 (* extracts a string from value *)
    90 fun flatten v = case v of 
    90 fun flatten v = case v of 
    91     Void => ""
    91     Empty => ""
    92   | Chr(c) => Char.toString c
    92   | Chr(c) => Char.toString c
    93   | Left(v) => flatten v
    93   | Left(v) => flatten v
    94   | Right(v) => flatten v
    94   | Right(v) => flatten v
    95   | Sequ(v1, v2) => flatten v1 ^ flatten v2
    95   | Sequ(v1, v2) => flatten v1 ^ flatten v2
    96   | Stars(vs) => String.concat (List.map flatten vs)
    96   | Stars(vs) => String.concat (List.map flatten vs)
    97   | Rec(_, v) => flatten v
    97   | Rec(_, v) => flatten v
    98 
    98 
    99 
    99 
   100 (* extracts an environment from a value *)
   100 (* extracts an environment from a value *)
   101 fun env v = case v of 
   101 fun env v = case v of 
   102     Void => []
   102     Empty => []
   103   | Chr(c) => []
   103   | Chr(c) => []
   104   | Left(v) => env v
   104   | Left(v) => env v
   105   | Right(v) => env v
   105   | Right(v) => env v
   106   | Sequ(v1, v2) => env v1 @ env v2
   106   | Sequ(v1, v2) => env v1 @ env v2
   107   | Stars(vs) => List.foldr (op @) [] (List.map env vs)
   107   | Stars(vs) => List.foldr (op @) [] (List.map env vs)
   111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs)
   111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs)
   112 
   112 
   113 
   113 
   114 (* the value for a nullable rexp *)
   114 (* the value for a nullable rexp *)
   115 fun mkeps r = case r of 
   115 fun mkeps r = case r of 
   116     EMPTY => Void
   116     ONE => Empty
   117   | ALT(r1, r2) => 
   117   | ALT(r1, r2) => 
   118       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   118       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   119   | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2)
   119   | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2)
   120   | STAR(r) => Stars([])
   120   | STAR(r) => Stars([])
   121   | RECD(x, r) => Rec(x, mkeps r)
   121   | RECD(x, r) => Rec(x, mkeps r)
   128   | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2)
   128   | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2)
   129   | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2)
   129   | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2)
   130   | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2)
   130   | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2)
   131   | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1)
   131   | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1)
   132   | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2)
   132   | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2)
   133   | (CHAR(d), Void) => Chr(d) 
   133   | (CHAR(d), Empty) => Chr(d) 
   134   | (RECD(x, r1), _) => Rec(x, inj r1 c v)
   134   | (RECD(x, r1), _) => Rec(x, inj r1 c v)
   135   | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n");
   135   | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n");
   136           print ("v: " ^ PolyML.makestring v ^ "\n");
   136           print ("v: " ^ PolyML.makestring v ^ "\n");
   137           raise Error)
   137           raise Error)
   138 
   138 
   143 fun f_alt f1 f2 = fn v => case v of
   143 fun f_alt f1 f2 = fn v => case v of
   144     Right(v) => Right(f2 v)
   144     Right(v) => Right(f2 v)
   145   | Left(v) => Left(f1 v)
   145   | Left(v) => Left(f1 v)
   146 fun f_seq f1 f2 = fn v => case v of 
   146 fun f_seq f1 f2 = fn v => case v of 
   147   Sequ(v1, v2) => Sequ(f1 v1, f2 v2)
   147   Sequ(v1, v2) => Sequ(f1 v1, f2 v2)
   148 fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v)
   148 fun f_seq_Empty1 f1 f2 = fn v => Sequ(f1 Empty, f2 v)
   149 fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void)
   149 fun f_seq_Empty2 f1 f2 = fn v => Sequ(f1 v, f2 Empty)
   150 fun f_rec f = fn v => case v of
   150 fun f_rec f = fn v => case v of
   151     Rec(x, v) => Rec(x, f v)
   151     Rec(x, v) => Rec(x, f v)
   152 
   152 
   153 exception ShouldNotHappen
   153 exception ShouldNotHappen
   154 
   154 
   159 fun simp r = case r of
   159 fun simp r = case r of
   160     ALT(r1, r2) => 
   160     ALT(r1, r2) => 
   161       let val (r1s, f1s) = simp r1  
   161       let val (r1s, f1s) = simp r1  
   162           val (r2s, f2s) = simp r2 in
   162           val (r2s, f2s) = simp r2 in
   163         (case (r1s, r2s) of
   163         (case (r1s, r2s) of
   164             (NULL, _) => (r2s, f_right f2s)
   164             (ZERO, _) => (r2s, f_right f2s)
   165           | (_, NULL) => (r1s, f_left f1s)
   165           | (_, ZERO) => (r1s, f_left f1s)
   166           | (_, _)    => if r1s = r2s then (r1s, f_left f1s)
   166           | (_, _)    => if r1s = r2s then (r1s, f_left f1s)
   167                          else (ALT (r1s, r2s), f_alt f1s f2s))
   167                          else (ALT (r1s, r2s), f_alt f1s f2s))
   168       end 
   168       end 
   169   | SEQ(r1, r2) => 
   169   | SEQ(r1, r2) => 
   170       let val (r1s, f1s) = simp r1 
   170       let val (r1s, f1s) = simp r1 
   171           val (r2s, f2s) = simp r2 in
   171           val (r2s, f2s) = simp r2 in
   172         (case (r1s, r2s) of
   172         (case (r1s, r2s) of
   173           (NULL, _)  => (NULL, f_error)
   173           (ZERO, _)  => (ZERO, f_error)
   174         | (_, NULL)  => (NULL, f_error)
   174         | (_, ZERO)  => (ZERO, f_error)
   175         | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s)
   175         | (ONE, _) => (r2s, f_seq_Empty1 f1s f2s)
   176         | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s)
   176         | (_, ONE) => (r1s, f_seq_Empty2 f1s f2s)
   177         | (_, _)     => (SEQ(r1s, r2s), f_seq f1s f2s))
   177         | (_, _)     => (SEQ(r1s, r2s), f_seq f1s f2s))
   178       end  
   178       end  
   179   | RECD(x, r1) => 
   179   | RECD(x, r1) => 
   180       let val (r1s, f1s) = simp r1 in
   180       let val (r1s, f1s) = simp r1 in
   181         (RECD(x, r1s), f_rec f1s)
   181         (RECD(x, r1s), f_rec f1s)
   182       end
   182       end
   183   | r => (r, f_id)
   183   | r => (r, f_id)
   184 
   184 
   185 fun der_simp c r = case r of
   185 fun der_simp c r = case r of
   186     NULL => (NULL, f_id)
   186     ZERO => (ZERO, f_id)
   187   | EMPTY => (NULL, f_id)
   187   | ONE => (ZERO, f_id)
   188   | CHAR(d) => ((if c = d then EMPTY else NULL), f_id)
   188   | CHAR(d) => ((if c = d then ONE else ZERO), f_id)
   189   | ALT(r1, r2) => 
   189   | ALT(r1, r2) => 
   190       let 
   190       let 
   191         val (r1d, f1d) = der_simp c r1 
   191         val (r1d, f1d) = der_simp c r1 
   192         val (r2d, f2d) = der_simp c r2 
   192         val (r2d, f2d) = der_simp c r2 
   193       in
   193       in
   194         case (r1d, r2d) of
   194         case (r1d, r2d) of
   195           (NULL, _) => (r2d, f_right f2d)
   195           (ZERO, _) => (r2d, f_right f2d)
   196         | (_, NULL) => (r1d, f_left f1d)
   196         | (_, ZERO) => (r1d, f_left f1d)
   197         | (_, _)    => if r1d = r2d then (r1d, f_left f1d)
   197         | (_, _)    => if r1d = r2d then (r1d, f_left f1d)
   198                        else (ALT (r1d, r2d), f_alt f1d f2d)
   198                        else (ALT (r1d, r2d), f_alt f1d f2d)
   199       end
   199       end
   200   | SEQ(r1, r2) => 
   200   | SEQ(r1, r2) => 
   201       if nullable r1 
   201       if nullable r1 
   204           val (r1d, f1d) = der_simp c r1 
   204           val (r1d, f1d) = der_simp c r1 
   205           val (r2d, f2d) = der_simp c r2
   205           val (r2d, f2d) = der_simp c r2
   206           val (r2s, f2s) = simp r2 
   206           val (r2s, f2s) = simp r2 
   207         in
   207         in
   208           case (r1d, r2s, r2d) of
   208           case (r1d, r2s, r2d) of
   209             (NULL, _, _)  => (r2d, f_right f2d)
   209             (ZERO, _, _)  => (r2d, f_right f2d)
   210           | (_, NULL, _)  => (r2d, f_right f2d)
   210           | (_, ZERO, _)  => (r2d, f_right f2d)
   211           | (_, _, NULL)  => (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
   211           | (_, _, ZERO)  => (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
   212           | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d)
   212           | (ONE, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d)
   213           | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d)
   213           | (_, ONE, _) => (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d)
   214           | (_, _, _)     => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)
   214           | (_, _, _)     => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)
   215         end
   215         end
   216       else 
   216       else 
   217         let 
   217         let 
   218           val (r1d, f1d) = der_simp c r1 
   218           val (r1d, f1d) = der_simp c r1 
   219           val (r2s, f2s) = simp r2
   219           val (r2s, f2s) = simp r2
   220         in
   220         in
   221           case (r1d, r2s) of
   221           case (r1d, r2s) of
   222             (NULL, _) => (NULL, f_error)
   222             (ZERO, _) => (ZERO, f_error)
   223           | (_, NULL) => (NULL, f_error)
   223           | (_, ZERO) => (ZERO, f_error)
   224           | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s)
   224           | (ONE, _) => (r2s, f_seq_Empty1 f1d f2s)
   225           | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s)
   225           | (_, ONE) => (r1d, f_seq_Empty2 f1d f2s)
   226           | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s)
   226           | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s)
   227   	end	  
   227   	end	  
   228   | STAR(r1) => 
   228   | STAR(r1) => 
   229       let 
   229       let 
   230         val (r1d, f1d) = der_simp c r1 
   230         val (r1d, f1d) = der_simp c r1 
   231       in
   231       in
   232         case r1d of
   232         case r1d of
   233           NULL => (NULL, f_error)
   233           ZERO => (ZERO, f_error)
   234         | EMPTY => (STAR r1, f_seq_Void1 f1d f_id)
   234         | ONE => (STAR r1, f_seq_Empty1 f1d f_id)
   235         | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id)
   235         | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id)
   236       end
   236       end
   237   | RECD(x, r1) => der_simp c r1 
   237   | RECD(x, r1) => der_simp c r1 
   238 
   238 
   239 
   239