progs/fsharp/re.ml
changeset 156 6a43ea9305ba
parent 3 94824659f6d7
equal deleted inserted replaced
155:c9027db225cc 156:6a43ea9305ba
     1 
     1 
     2 type rexp =
     2 type rexp =
     3    NULL 
     3    ZERO 
     4  | EMPTY 
     4  | ONE 
     5  | CHAR of char
     5  | CHAR of char
     6  | ALT of rexp * rexp
     6  | ALT of rexp * rexp
     7  | SEQ of rexp * rexp 
     7  | SEQ of rexp * rexp 
     8  | STAR of rexp 
     8  | STAR of rexp 
     9  | RECD of string * rexp;;
     9  | RECD of string * rexp;;
    10 
    10 
    11 type value =
    11 type value =
    12    Void
    12    Empty
    13  | Chr of char
    13  | Chr of char
    14  | Sequ of value * value
    14  | Sequ of value * value
    15  | Left of value
    15  | Left of value
    16  | Right of value
    16  | Right of value
    17  | Stars of value list
    17  | Stars of value list
    22 
    22 
    23 let string_repeat s n =  String.replicate n s;;
    23 let string_repeat s n =  String.replicate n s;;
    24 
    24 
    25 (* some helper functions for rexps *)
    25 (* some helper functions for rexps *)
    26 let rec seq s = match s with
    26 let rec seq s = match s with
    27   | [] -> EMPTY
    27   | [] -> ONE
    28   | [c] -> CHAR(c)
    28   | [c] -> CHAR(c)
    29   | c::cs -> SEQ(CHAR(c), seq cs);;
    29   | c::cs -> SEQ(CHAR(c), seq cs);;
    30 
    30 
    31 let chr c = CHAR(c)
    31 let chr c = CHAR(c)
    32 
    32 
    39 let (--) r1 r2 = SEQ(r1, r2);;
    39 let (--) r1 r2 = SEQ(r1, r2);;
    40 
    40 
    41 let ($) x r = RECD(x, r);;
    41 let ($) x r = RECD(x, r);;
    42 
    42 
    43 let alts rs = match rs with 
    43 let alts rs = match rs with 
    44   | [] -> NULL
    44   | [] -> ZERO
    45   | [r] -> r
    45   | [r] -> r
    46   | r::rs -> List.fold (++) r rs;;
    46   | r::rs -> List.fold (++) r rs;;
    47 
    47 
    48 
    48 
    49 (* size of a regular expressions - for testing purposes *)
    49 (* size of a regular expressions - for testing purposes *)
    50 let rec size r = match r with
    50 let rec size r = match r with
    51   | NULL -> 1
    51   | ZERO -> 1
    52   | EMPTY -> 1
    52   | ONE -> 1
    53   | CHAR(_) -> 1
    53   | CHAR(_) -> 1
    54   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
    54   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
    55   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
    55   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
    56   | STAR(r) -> 1 + (size r)
    56   | STAR(r) -> 1 + (size r)
    57   | RECD(_, r) -> 1 + (size r);;
    57   | RECD(_, r) -> 1 + (size r);;
    58 
    58 
    59 (* nullable function: tests whether the regular 
    59 (* nullable function: tests whether the regular 
    60    expression can recognise the empty string *)
    60    expression can recognise the empty string *)
    61 let rec nullable r = match r with
    61 let rec nullable r = match r with
    62   | NULL -> false
    62   | ZERO -> false
    63   | EMPTY -> true
    63   | ONE -> true
    64   | CHAR(_) -> false
    64   | CHAR(_) -> false
    65   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
    65   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
    66   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
    66   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
    67   | STAR(_) -> true
    67   | STAR(_) -> true
    68   | RECD(_, r) -> nullable(r);;
    68   | RECD(_, r) -> nullable(r);;
    69 
    69 
    70 (* derivative of a regular expression r w.r.t. a character c *)
    70 (* derivative of a regular expression r w.r.t. a character c *)
    71 let rec der c r = match r with 
    71 let rec der c r = match r with 
    72   | NULL -> NULL
    72   | ZERO -> ZERO
    73   | EMPTY -> NULL
    73   | ONE -> ZERO
    74   | CHAR(d) -> if c = d then EMPTY else NULL
    74   | CHAR(d) -> if c = d then ONE else ZERO
    75   | ALT(r1, r2) -> ALT(der c r1, der c r2)
    75   | ALT(r1, r2) -> ALT(der c r1, der c r2)
    76   | SEQ(r1, r2) -> 
    76   | SEQ(r1, r2) -> 
    77       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    77       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
    78       else SEQ(der c r1, r2)
    78       else SEQ(der c r1, r2)
    79   | STAR(r) -> SEQ(der c r, STAR(r))
    79   | STAR(r) -> SEQ(der c r, STAR(r))
    84   | [] -> r
    84   | [] -> r
    85   | c::s -> ders s (der c r);;
    85   | c::s -> ders s (der c r);;
    86 
    86 
    87 (* extracts a string from value *)
    87 (* extracts a string from value *)
    88 let rec flatten v = match v with 
    88 let rec flatten v = match v with 
    89   | Void -> ""
    89   | Empty -> ""
    90   | Chr(c) -> System.Convert.ToString(c)
    90   | Chr(c) -> System.Convert.ToString(c)
    91   | Left(v) -> flatten v
    91   | Left(v) -> flatten v
    92   | Right(v) -> flatten v
    92   | Right(v) -> flatten v
    93   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
    93   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
    94   | Stars(vs) -> String.concat "" (List.map flatten vs)
    94   | Stars(vs) -> String.concat "" (List.map flatten vs)
    95   | Rec(_, v) -> flatten v;;
    95   | Rec(_, v) -> flatten v;;
    96 
    96 
    97 
    97 
    98 (* extracts an environment from a value *)
    98 (* extracts an environment from a value *)
    99 let rec env v = match v with
    99 let rec env v = match v with
   100   | Void -> []
   100   | Empty -> []
   101   | Chr(c) -> []
   101   | Chr(c) -> []
   102   | Left(v) -> env v
   102   | Left(v) -> env v
   103   | Right(v) -> env v
   103   | Right(v) -> env v
   104   | Sequ(v1, v2) -> env v1 @ env v2
   104   | Sequ(v1, v2) -> env v1 @ env v2
   105   | Stars(vs) -> List.fold (@) [] (List.map env vs)
   105   | Stars(vs) -> List.fold (@) [] (List.map env vs)
   109 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
   109 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
   110 
   110 
   111 
   111 
   112 (* the value for a nullable rexp *)
   112 (* the value for a nullable rexp *)
   113 let rec mkeps r = match r with
   113 let rec mkeps r = match r with
   114   | EMPTY -> Void
   114   | ONE -> Empty
   115   | ALT(r1, r2) -> 
   115   | ALT(r1, r2) -> 
   116       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   116       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
   117   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
   117   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
   118   | STAR(r) -> Stars([])
   118   | STAR(r) -> Stars([])
   119   | RECD(x, r) -> Rec(x, mkeps r);;
   119   | RECD(x, r) -> Rec(x, mkeps r);;
   125   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
   125   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
   126   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
   126   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
   127   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
   127   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
   128   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
   128   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
   129   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
   129   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
   130   | CHAR(d), Void -> Chr(d) 
   130   | CHAR(d), Empty -> Chr(d) 
   131   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
   131   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
   132 
   132 
   133 (* some "rectification" functions for simplification *)
   133 (* some "rectification" functions for simplification *)
   134 let f_id v = v;;
   134 let f_id v = v;;
   135 let f_right f = fun v -> Right(f v);;
   135 let f_right f = fun v -> Right(f v);;
   137 let f_alt f1 f2 = fun v -> match v with 
   137 let f_alt f1 f2 = fun v -> match v with 
   138     Right(v) -> Right(f2 v)
   138     Right(v) -> Right(f2 v)
   139   | Left(v) -> Left(f1 v);;
   139   | Left(v) -> Left(f1 v);;
   140 let f_seq f1 f2 = fun v -> match v with 
   140 let f_seq f1 f2 = fun v -> match v with 
   141   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
   141   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
   142 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);;
   142 let f_seq_Empty1 f1 f2 = fun v -> Sequ(f1 Empty, f2 v);;
   143 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);;
   143 let f_seq_Empty2 f1 f2 = fun v -> Sequ(f1 v, f2 Empty);;
   144 let f_rec f = fun v -> match v with
   144 let f_rec f = fun v -> match v with
   145     Rec(x, v) -> Rec(x, f v);;
   145     Rec(x, v) -> Rec(x, f v);;
   146 
   146 
   147 (* simplification of regular expressions returning also an 
   147 (* simplification of regular expressions returning also an 
   148    rectification function; no simplification under STARs *)
   148    rectification function; no simplification under STARs *)
   149 let rec simp r = match r with
   149 let rec simp r = match r with
   150     ALT(r1, r2) -> 
   150     ALT(r1, r2) -> 
   151       let (r1s, f1s) = simp r1 in 
   151       let (r1s, f1s) = simp r1 in 
   152       let (r2s, f2s) = simp r2 in
   152       let (r2s, f2s) = simp r2 in
   153       (match r1s, r2s with
   153       (match r1s, r2s with
   154           NULL, _ -> (r2s, f_right f2s)
   154           ZERO, _ -> (r2s, f_right f2s)
   155         | _, NULL -> (r1s, f_left f1s)
   155         | _, ZERO -> (r1s, f_left f1s)
   156         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
   156         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
   157                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
   157                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
   158   | SEQ(r1, r2) -> 
   158   | SEQ(r1, r2) -> 
   159       let (r1s, f1s) = simp r1 in
   159       let (r1s, f1s) = simp r1 in
   160       let (r2s, f2s) = simp r2 in
   160       let (r2s, f2s) = simp r2 in
   161       (match r1s, r2s with
   161       (match r1s, r2s with
   162           NULL, _  -> (NULL, f_right f2s)
   162           ZERO, _  -> (ZERO, f_right f2s)
   163         | _, NULL  -> (NULL, f_left f1s)
   163         | _, ZERO  -> (ZERO, f_left f1s)
   164         | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s)
   164         | ONE, _ -> (r2s, f_seq_Empty1 f1s f2s)
   165         | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s)
   165         | _, ONE -> (r1s, f_seq_Empty2 f1s f2s)
   166         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
   166         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
   167   | RECD(x, r1) -> 
   167   | RECD(x, r1) -> 
   168       let (r1s, f1s) = simp r1 in
   168       let (r1s, f1s) = simp r1 in
   169       (RECD(x, r1s), f_rec f1s)
   169       (RECD(x, r1s), f_rec f1s)
   170   | r -> (r, f_id)
   170   | r -> (r, f_id)