progs/fsharp/re.ml
changeset 3 94824659f6d7
child 156 6a43ea9305ba
equal deleted inserted replaced
2:2bc119fc8657 3:94824659f6d7
       
     1 
       
     2 type rexp =
       
     3    NULL 
       
     4  | EMPTY 
       
     5  | CHAR of char
       
     6  | ALT of rexp * rexp
       
     7  | SEQ of rexp * rexp 
       
     8  | STAR of rexp 
       
     9  | RECD of string * rexp;;
       
    10 
       
    11 type value =
       
    12    Void
       
    13  | Chr of char
       
    14  | Sequ of value * value
       
    15  | Left of value
       
    16  | Right of value
       
    17  | Stars of value list
       
    18  | Rec of string * value;;
       
    19 
       
    20 (* some helper functions for strings *)   
       
    21 let explode s = [for c in s -> c];;
       
    22 
       
    23 let string_repeat s n =  String.replicate n s;;
       
    24 
       
    25 (* some helper functions for rexps *)
       
    26 let rec seq s = match s with
       
    27   | [] -> EMPTY
       
    28   | [c] -> CHAR(c)
       
    29   | c::cs -> SEQ(CHAR(c), seq cs);;
       
    30 
       
    31 let chr c = CHAR(c)
       
    32 
       
    33 let str s = seq(explode s);;
       
    34 
       
    35 let plus r = SEQ(r, STAR(r));;
       
    36 
       
    37 let (++) r1 r2 = ALT(r1, r2);;
       
    38 
       
    39 let (--) r1 r2 = SEQ(r1, r2);;
       
    40 
       
    41 let ($) x r = RECD(x, r);;
       
    42 
       
    43 let alts rs = match rs with 
       
    44   | [] -> NULL
       
    45   | [r] -> r
       
    46   | r::rs -> List.fold (++) r rs;;
       
    47 
       
    48 
       
    49 (* size of a regular expressions - for testing purposes *)
       
    50 let rec size r = match r with
       
    51   | NULL -> 1
       
    52   | EMPTY -> 1
       
    53   | CHAR(_) -> 1
       
    54   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
       
    55   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
       
    56   | STAR(r) -> 1 + (size r)
       
    57   | RECD(_, r) -> 1 + (size r);;
       
    58 
       
    59 (* nullable function: tests whether the regular 
       
    60    expression can recognise the empty string *)
       
    61 let rec nullable r = match r with
       
    62   | NULL -> false
       
    63   | EMPTY -> true
       
    64   | CHAR(_) -> false
       
    65   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
       
    66   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
       
    67   | STAR(_) -> true
       
    68   | RECD(_, r) -> nullable(r);;
       
    69 
       
    70 (* derivative of a regular expression r w.r.t. a character c *)
       
    71 let rec der c r = match r with 
       
    72   | NULL -> NULL
       
    73   | EMPTY -> NULL
       
    74   | CHAR(d) -> if c = d then EMPTY else NULL
       
    75   | ALT(r1, r2) -> ALT(der c r1, der c r2)
       
    76   | SEQ(r1, r2) -> 
       
    77       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
       
    78       else SEQ(der c r1, r2)
       
    79   | STAR(r) -> SEQ(der c r, STAR(r))
       
    80   | RECD(_, r) -> der c r;;
       
    81 
       
    82 (* derivative w.r.t. a list of chars (iterates der) *)
       
    83 let rec ders s r = match s with 
       
    84   | [] -> r
       
    85   | c::s -> ders s (der c r);;
       
    86 
       
    87 (* extracts a string from value *)
       
    88 let rec flatten v = match v with 
       
    89   | Void -> ""
       
    90   | Chr(c) -> System.Convert.ToString(c)
       
    91   | Left(v) -> flatten v
       
    92   | Right(v) -> flatten v
       
    93   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
       
    94   | Stars(vs) -> String.concat "" (List.map flatten vs)
       
    95   | Rec(_, v) -> flatten v;;
       
    96 
       
    97 
       
    98 (* extracts an environment from a value *)
       
    99 let rec env v = match v with
       
   100   | Void -> []
       
   101   | Chr(c) -> []
       
   102   | Left(v) -> env v
       
   103   | Right(v) -> env v
       
   104   | Sequ(v1, v2) -> env v1 @ env v2
       
   105   | Stars(vs) -> List.fold (@) [] (List.map env vs)
       
   106   | Rec(x, v) -> (x, flatten v) :: env v;;
       
   107 
       
   108 let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";;
       
   109 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
       
   110 
       
   111 
       
   112 (* the value for a nullable rexp *)
       
   113 let rec mkeps r = match r with
       
   114   | EMPTY -> Void
       
   115   | ALT(r1, r2) -> 
       
   116       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
       
   117   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
       
   118   | STAR(r) -> Stars([])
       
   119   | RECD(x, r) -> Rec(x, mkeps r);;
       
   120 
       
   121 
       
   122 (* injection of a char into a value *)
       
   123 let rec inj r c v = match r, v with
       
   124   | STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs)
       
   125   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
       
   126   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
       
   127   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
       
   128   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
       
   129   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
       
   130   | CHAR(d), Void -> Chr(d) 
       
   131   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
       
   132 
       
   133 (* some "rectification" functions for simplification *)
       
   134 let f_id v = v;;
       
   135 let f_right f = fun v -> Right(f v);;
       
   136 let f_left f = fun v -> Left(f v);;
       
   137 let f_alt f1 f2 = fun v -> match v with 
       
   138     Right(v) -> Right(f2 v)
       
   139   | Left(v) -> Left(f1 v);;
       
   140 let f_seq f1 f2 = fun v -> match v with 
       
   141   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
       
   142 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);;
       
   143 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);;
       
   144 let f_rec f = fun v -> match v with
       
   145     Rec(x, v) -> Rec(x, f v);;
       
   146 
       
   147 (* simplification of regular expressions returning also an 
       
   148    rectification function; no simplification under STARs *)
       
   149 let rec simp r = match r with
       
   150     ALT(r1, r2) -> 
       
   151       let (r1s, f1s) = simp r1 in 
       
   152       let (r2s, f2s) = simp r2 in
       
   153       (match r1s, r2s with
       
   154           NULL, _ -> (r2s, f_right f2s)
       
   155         | _, NULL -> (r1s, f_left f1s)
       
   156         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
       
   157                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
       
   158   | SEQ(r1, r2) -> 
       
   159       let (r1s, f1s) = simp r1 in
       
   160       let (r2s, f2s) = simp r2 in
       
   161       (match r1s, r2s with
       
   162           NULL, _  -> (NULL, f_right f2s)
       
   163         | _, NULL  -> (NULL, f_left f1s)
       
   164         | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s)
       
   165         | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s)
       
   166         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
       
   167   | RECD(x, r1) -> 
       
   168       let (r1s, f1s) = simp r1 in
       
   169       (RECD(x, r1s), f_rec f1s)
       
   170   | r -> (r, f_id)
       
   171 ;;
       
   172 
       
   173 (* matcher function *)
       
   174 let matcher r s = nullable(ders (explode s) r);;
       
   175 
       
   176 (* lexing function (produces a value) *)
       
   177 exception LexError;;
       
   178 
       
   179 let rec lex r s = match s with
       
   180     [] -> if (nullable r) then mkeps r else raise LexError
       
   181   | c::cs -> inj r c (lex (der c r) cs);;
       
   182 
       
   183 let lexing r s = lex r (explode s);;
       
   184 
       
   185 (* lexing with simplification *)
       
   186 let rec lex_simp r s = match s with
       
   187     [] -> if (nullable r) then mkeps r else raise LexError
       
   188   | c::cs -> 
       
   189     let (r_simp, f_simp) = simp (der c r) in
       
   190     inj r c (f_simp (lex_simp r_simp cs));;
       
   191 
       
   192 let lexing_simp r s = lex_simp r (explode s);;
       
   193 
       
   194 
       
   195 
       
   196 
       
   197 (* Lexing rules for a small WHILE language *)
       
   198 let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));;
       
   199 let digit = alts (List.map chr (explode "0123456789"));;
       
   200 let idents =  sym -- STAR(sym ++ digit);;
       
   201 let nums = plus(digit);;
       
   202 let keywords = alts (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);;
       
   203 let semicolon = str ";"
       
   204 let ops = alts (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);;
       
   205 let whitespace = plus(str " " ++ str "\n" ++ str "\t");;
       
   206 let rparen = str ")";;
       
   207 let lparen = str "(";;
       
   208 let begin_paren = str "{";;
       
   209 let end_paren = str "}";;
       
   210 
       
   211 
       
   212 let while_regs = STAR(("k" $ keywords) ++
       
   213                       ("i" $ idents) ++
       
   214                       ("o" $ ops) ++ 
       
   215                       ("n" $ nums) ++ 
       
   216                       ("s" $ semicolon) ++ 
       
   217                       ("p" $ (lparen ++ rparen)) ++ 
       
   218                       ("b" $ (begin_paren ++ end_paren)) ++ 
       
   219                       ("w" $ whitespace));;
       
   220 
       
   221 
       
   222 
       
   223 (* Some Tests
       
   224   ============ *)
       
   225 
       
   226 let time f x =
       
   227   let t = System.DateTime.Now in
       
   228   let f_x = f x in
       
   229   (printfn "%O" (System.DateTime.Now - t); f_x);;
       
   230 
       
   231 let prog0 = "read n";;
       
   232 string_of_env (env (lexing while_regs prog0));;
       
   233 
       
   234 let prog1 = "read  n; write (n)";;
       
   235 string_of_env (env (lexing_simp while_regs prog1));;
       
   236 
       
   237 
       
   238 let prog2 = "
       
   239 i := 2;
       
   240 max := 100;
       
   241 while i < max do {
       
   242   isprime := 1;
       
   243   j := 2;
       
   244   while (j * j) <= i + 1  do {
       
   245     if i % j == 0 then isprime := 0  else skip;
       
   246     j := j + 1
       
   247   };
       
   248   if isprime == 1 then write i else skip;
       
   249   i := i + 1
       
   250 }";;
       
   251 
       
   252 for i = 1 to 100 do
       
   253   printf "%i: " i ;
       
   254   time (lexing_simp while_regs) (string_repeat prog2 i);
       
   255 done;;
       
   256