progs/ocaml/re.ml
changeset 156 6a43ea9305ba
parent 3 94824659f6d7
equal deleted inserted replaced
155:c9027db225cc 156:6a43ea9305ba
     1 
     1 
     2 type rexp =
     2 type rexp =
     3    NULL 
     3    ZERO 
     4  | EMPTY 
     4  | ONE 
     5  | CHAR of char
     5  | CHAR of char
     6  | ALT of rexp * rexp
     6  | ALT of rexp * rexp
     7  | SEQ of rexp * rexp 
     7  | SEQ of rexp * rexp 
     8  | STAR of rexp 
     8  | STAR of rexp 
     9  | RECD of string * rexp;;
     9  | RECD of string * rexp;;
    10 
    10 
    11 type value =
    11 type value =
    12    Void
    12    Empty
    13  | Chr of char
    13  | Chr of char
    14  | Sequ of value * value
    14  | Sequ of value * value
    15  | Left of value
    15  | Left of value
    16  | Right of value
    16  | Right of value
    17  | Stars of value list
    17  | Stars of value list
    18  | Rec of string * value;;
    18  | Rec of string * value;;
    19 
    19 
    20 let rec string_of_val v = match v with
    20 let rec string_of_val v = match v with
    21    Void -> "Void"
    21    Empty -> "Empty"
    22  | Chr(c) -> String.make 1 c
    22  | Chr(c) -> String.make 1 c
    23  | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")"
    23  | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")"
    24  | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")"
    24  | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")"
    25  | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")"
    25  | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")"
    26  | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]"
    26  | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]"
    36 let string_repeat s n =
    36 let string_repeat s n =
    37   Array.fold_left (^) "" (Array.make n s);;
    37   Array.fold_left (^) "" (Array.make n s);;
    38 
    38 
    39 (* some helper functions for rexps *)
    39 (* some helper functions for rexps *)
    40 let rec seq s = match s with
    40 let rec seq s = match s with
    41     [] -> EMPTY
    41     [] -> ONE
    42   | [c] -> CHAR(c)
    42   | [c] -> CHAR(c)
    43   | c::cs -> SEQ(CHAR(c), seq cs);;
    43   | c::cs -> SEQ(CHAR(c), seq cs);;
    44 
    44 
    45 let chr c = CHAR(c)
    45 let chr c = CHAR(c)
    46 
    46 
    53 let (--) r1 r2 = SEQ(r1, r2);;
    53 let (--) r1 r2 = SEQ(r1, r2);;
    54 
    54 
    55 let ($) x r = RECD(x, r);;
    55 let ($) x r = RECD(x, r);;
    56 
    56 
    57 let alts rs = match rs with 
    57 let alts rs = match rs with 
    58     [] -> NULL
    58     [] -> ZERO
    59   | [r] -> r
    59   | [r] -> r
    60   | r::rs -> List.fold_left (++) r rs;;
    60   | r::rs -> List.fold_left (++) r rs;;
    61 
    61 
    62 
    62 
    63 (* size of a regular expressions - for testing purposes *)
    63 (* size of a regular expressions - for testing purposes *)
    64 let rec size r = match r with
    64 let rec size r = match r with
    65     NULL -> 1
    65     ZERO -> 1
    66   | EMPTY -> 1
    66   | ONE -> 1
    67   | CHAR(_) -> 1
    67   | CHAR(_) -> 1
    68   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
    68   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
    69   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
    69   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
    70   | STAR(r) -> 1 + (size r)
    70   | STAR(r) -> 1 + (size r)
    71   | RECD(_, r) -> 1 + (size r);;
    71   | RECD(_, r) -> 1 + (size r);;
    72 
    72 
    73 (* nullable function: tests whether the regular 
    73 (* nullable function: tests whether the regular 
    74    expression can recognise the empty string *)
    74    expression can recognise the empty string *)
    75 let rec nullable r = match r with
    75 let rec nullable r = match r with
    76     NULL -> false
    76     ZERO -> false
    77   | EMPTY -> true
    77   | ONE -> true
    78   | CHAR(_) -> false
    78   | CHAR(_) -> false
    79   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
    79   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
    80   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
    80   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
    81   | STAR(_) -> true
    81   | STAR(_) -> true
    82   | RECD(_, r) -> nullable(r);;
    82   | RECD(_, r) -> nullable(r);;
    83 
    83 
    84 (* derivative of a regular expression r w.r.t. a character c *)
    84 (* derivative of a regular expression r w.r.t. a character c *)
    85 let rec der c r = match r with 
    85 let rec der c r = match r with 
    86     NULL -> NULL
    86     ZERO -> ZERO
    87   | EMPTY -> NULL
    87   | ONE -> ZERO
    88   | CHAR(d) -> if c = d then EMPTY else NULL
    88   | CHAR(d) -> if c = d then ONE else ZERO
    89   | ALT(r1, r2) -> ALT(der c r1, der c r2)
    89   | ALT(r1, r2) -> ALT(der c r1, der c r2)
    90   | SEQ(r1, r2) -> 
    90   | SEQ(r1, r2) -> 
    91       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    91       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    92       else SEQ(der c r1, r2)
    92       else SEQ(der c r1, r2)
    93   | STAR(r) -> SEQ(der c r, STAR(r))
    93   | STAR(r) -> SEQ(der c r, STAR(r))
    98     [] -> r
    98     [] -> r
    99   | c::s -> ders s (der c r);;
    99   | c::s -> ders s (der c r);;
   100 
   100 
   101 (* extracts a string from value *)
   101 (* extracts a string from value *)
   102 let rec flatten v = match v with 
   102 let rec flatten v = match v with 
   103     Void -> ""
   103     Empty -> ""
   104   | Chr(c) -> String.make 1 c
   104   | Chr(c) -> String.make 1 c
   105   | Left(v) -> flatten v
   105   | Left(v) -> flatten v
   106   | Right(v) -> flatten v
   106   | Right(v) -> flatten v
   107   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
   107   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
   108   | Stars(vs) -> String.concat "" (List.map flatten vs)
   108   | Stars(vs) -> String.concat "" (List.map flatten vs)
   109   | Rec(_, v) -> flatten v;;
   109   | Rec(_, v) -> flatten v;;
   110 
   110 
   111 
   111 
   112 (* extracts an environment from a value *)
   112 (* extracts an environment from a value *)
   113 let rec env v = match v with
   113 let rec env v = match v with
   114     Void -> []
   114     Empty -> []
   115   | Chr(c) -> []
   115   | Chr(c) -> []
   116   | Left(v) -> env v
   116   | Left(v) -> env v
   117   | Right(v) -> env v
   117   | Right(v) -> env v
   118   | Sequ(v1, v2) -> env v1 @ env v2
   118   | Sequ(v1, v2) -> env v1 @ env v2
   119   | Stars(vs) -> List.flatten (List.map env vs)
   119   | Stars(vs) -> List.flatten (List.map env vs)
   123 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
   123 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
   124 
   124 
   125 
   125 
   126 (* the value for a nullable rexp *)
   126 (* the value for a nullable rexp *)
   127 let rec mkeps r = match r with
   127 let rec mkeps r = match r with
   128     EMPTY -> Void
   128     ONE -> Empty
   129   | ALT(r1, r2) -> 
   129   | ALT(r1, r2) -> 
   130       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   130       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   131   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
   131   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
   132   | STAR(r) -> Stars([])
   132   | STAR(r) -> Stars([])
   133   | RECD(x, r) -> Rec(x, mkeps r);;
   133   | RECD(x, r) -> Rec(x, mkeps r);;
   139   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
   139   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
   140   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
   140   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
   141   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
   141   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
   142   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
   142   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
   143   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
   143   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
   144   | CHAR(d), Void -> Chr(d) 
   144   | CHAR(d), Empty -> Chr(d) 
   145   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
   145   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
   146 
   146 
   147 (* some "rectification" functions for simplification *)
   147 (* some "rectification" functions for simplification *)
   148 let f_id v = v;;
   148 let f_id v = v;;
   149 let f_right f = fun v -> Right(f v);;
   149 let f_right f = fun v -> Right(f v);;
   151 let f_alt f1 f2 = fun v -> match v with 
   151 let f_alt f1 f2 = fun v -> match v with 
   152     Right(v) -> Right(f2 v)
   152     Right(v) -> Right(f2 v)
   153   | Left(v) -> Left(f1 v);;
   153   | Left(v) -> Left(f1 v);;
   154 let f_seq f1 f2 = fun v -> match v with 
   154 let f_seq f1 f2 = fun v -> match v with 
   155   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
   155   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
   156 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);;
   156 let f_seq_Empty1 f1 f2 = fun v -> Sequ(f1 Empty, f2 v);;
   157 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);;
   157 let f_seq_Empty2 f1 f2 = fun v -> Sequ(f1 v, f2 Empty);;
   158 let f_rec f = fun v -> match v with
   158 let f_rec f = fun v -> match v with
   159     Rec(x, v) -> Rec(x, f v);;
   159     Rec(x, v) -> Rec(x, f v);;
   160 
   160 
   161 exception ShouldNotHappen
   161 exception ShouldNotHappen
   162 let f_error v = raise ShouldNotHappen
   162 let f_error v = raise ShouldNotHappen
   166 let rec simp r = match r with
   166 let rec simp r = match r with
   167     ALT(r1, r2) -> 
   167     ALT(r1, r2) -> 
   168       let (r1s, f1s) = simp r1 in 
   168       let (r1s, f1s) = simp r1 in 
   169       let (r2s, f2s) = simp r2 in
   169       let (r2s, f2s) = simp r2 in
   170       (match r1s, r2s with
   170       (match r1s, r2s with
   171           NULL, _ -> (r2s, f_right f2s)
   171           ZERO, _ -> (r2s, f_right f2s)
   172         | _, NULL -> (r1s, f_left f1s)
   172         | _, ZERO -> (r1s, f_left f1s)
   173         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
   173         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
   174                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
   174                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
   175   | SEQ(r1, r2) -> 
   175   | SEQ(r1, r2) -> 
   176       let (r1s, f1s) = simp r1 in
   176       let (r1s, f1s) = simp r1 in
   177       let (r2s, f2s) = simp r2 in
   177       let (r2s, f2s) = simp r2 in
   178       (match r1s, r2s with
   178       (match r1s, r2s with
   179           NULL, _  -> (NULL, f_error)
   179           ZERO, _  -> (ZERO, f_error)
   180         | _, NULL  -> (NULL, f_error)
   180         | _, ZERO  -> (ZERO, f_error)
   181         | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s)
   181         | ONE, _ -> (r2s, f_seq_Empty1 f1s f2s)
   182         | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s)
   182         | _, ONE -> (r1s, f_seq_Empty2 f1s f2s)
   183         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
   183         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
   184   | RECD(x, r1) -> 
   184   | RECD(x, r1) -> 
   185       let (r1s, f1s) = simp r1 in
   185       let (r1s, f1s) = simp r1 in
   186       (RECD(x, r1s), f_rec f1s)
   186       (RECD(x, r1s), f_rec f1s)
   187   | r -> (r, f_id)
   187   | r -> (r, f_id)
   188 ;;
   188 ;;
   189 
   189 
   190 let rec der_simp c r = match r with
   190 let rec der_simp c r = match r with
   191     NULL -> (NULL, f_id)
   191     ZERO -> (ZERO, f_id)
   192   | EMPTY -> (NULL, f_id)
   192   | ONE -> (ZERO, f_id)
   193   | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id)
   193   | CHAR(d) -> ((if c = d then ONE else ZERO), f_id)
   194   | ALT(r1, r2) -> 
   194   | ALT(r1, r2) -> 
   195       let (r1d, f1d) = der_simp c r1 in
   195       let (r1d, f1d) = der_simp c r1 in
   196       let (r2d, f2d) = der_simp c r2 in
   196       let (r2d, f2d) = der_simp c r2 in
   197       (match r1d, r2d with
   197       (match r1d, r2d with
   198           NULL, _ -> (r2d, f_right f2d)
   198           ZERO, _ -> (r2d, f_right f2d)
   199         | _, NULL -> (r1d, f_left f1d)
   199         | _, ZERO -> (r1d, f_left f1d)
   200         | _, _    -> if r1d = r2d then (r1d, f_left f1d)
   200         | _, _    -> if r1d = r2d then (r1d, f_left f1d)
   201                      else (ALT (r1d, r2d), f_alt f1d f2d))
   201                      else (ALT (r1d, r2d), f_alt f1d f2d))
   202   | SEQ(r1, r2) -> 
   202   | SEQ(r1, r2) -> 
   203       if nullable r1 
   203       if nullable r1 
   204       then 
   204       then 
   205         let (r1d, f1d) = der_simp c r1 in 
   205         let (r1d, f1d) = der_simp c r1 in 
   206         let (r2d, f2d) = der_simp c r2 in
   206         let (r2d, f2d) = der_simp c r2 in
   207         let (r2s, f2s) = simp r2 in
   207         let (r2s, f2s) = simp r2 in
   208         (match r1d, r2s, r2d with
   208         (match r1d, r2s, r2d with
   209             NULL, _, _  -> (r2d, f_right f2d)
   209             ZERO, _, _  -> (r2d, f_right f2d)
   210           | _, NULL, _  -> (r2d, f_right f2d)
   210           | _, ZERO, _  -> (r2d, f_right f2d)
   211           | _, _, NULL  -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
   211           | _, _, ZERO  -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
   212           | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d)
   212           | ONE, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Empty1 f1d f2s) f2d)
   213           | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d)
   213           | _, ONE, _ -> (ALT(r1d, r2d), f_alt (f_seq_Empty2 f1d f2s) f2d)
   214           | _, _, _     -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d))
   214           | _, _, _     -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d))
   215       else 
   215       else 
   216         let (r1d, f1d) = der_simp c r1 in
   216         let (r1d, f1d) = der_simp c r1 in
   217         let (r2s, f2s) = simp r2 in
   217         let (r2s, f2s) = simp r2 in
   218         (match r1d, r2s with
   218         (match r1d, r2s with
   219             NULL, _ -> (NULL, f_error)
   219             ZERO, _ -> (ZERO, f_error)
   220           | _, NULL -> (NULL, f_error)
   220           | _, ZERO -> (ZERO, f_error)
   221           | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s)
   221           | ONE, _ -> (r2s, f_seq_Empty1 f1d f2s)
   222           | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s)
   222           | _, ONE -> (r1d, f_seq_Empty2 f1d f2s)
   223           | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s))	  
   223           | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s))	  
   224   | STAR(r1) -> 
   224   | STAR(r1) -> 
   225       let (r1d, f1d) = der_simp c r1 in
   225       let (r1d, f1d) = der_simp c r1 in
   226       (match r1d with
   226       (match r1d with
   227           NULL -> (NULL, f_error)
   227           ZERO -> (ZERO, f_error)
   228         | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id)
   228         | ONE -> (STAR r1, f_seq_Empty1 f1d f_id)
   229         | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id))
   229         | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id))
   230   | RECD(x, r1) -> der_simp c r1 
   230   | RECD(x, r1) -> der_simp c r1 
   231 
   231 
   232 
   232 
   233 (* matcher function *)
   233 (* matcher function *)