diff -r 2bc119fc8657 -r 94824659f6d7 progs/ocaml/re.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/ocaml/re.ml Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,394 @@ + +type rexp = + NULL + | EMPTY + | CHAR of char + | ALT of rexp * rexp + | SEQ of rexp * rexp + | STAR of rexp + | RECD of string * rexp;; + +type value = + Void + | Chr of char + | Sequ of value * value + | Left of value + | Right of value + | Stars of value list + | Rec of string * value;; + +let rec string_of_val v = match v with + Void -> "Void" + | Chr(c) -> String.make 1 c + | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")" + | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")" + | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")" + | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]" + | Rec(x, v1) -> x ^ " $ " ^ string_of_val v1;; + + +(* some helper functions for strings *) +let explode s = + let rec exp i l = + if i < 0 then l else exp (i - 1) (s.[i] :: l) in + exp (String.length s - 1) [];; + +let string_repeat s n = + Array.fold_left (^) "" (Array.make n s);; + +(* some helper functions for rexps *) +let rec seq s = match s with + [] -> EMPTY + | [c] -> CHAR(c) + | c::cs -> SEQ(CHAR(c), seq cs);; + +let chr c = CHAR(c) + +let str s = seq(explode s);; + +let plus r = SEQ(r, STAR(r));; + +let (++) r1 r2 = ALT(r1, r2);; + +let (--) r1 r2 = SEQ(r1, r2);; + +let ($) x r = RECD(x, r);; + +let alts rs = match rs with + [] -> NULL + | [r] -> r + | r::rs -> List.fold_left (++) r rs;; + + +(* size of a regular expressions - for testing purposes *) +let rec size r = match r with + NULL -> 1 + | EMPTY -> 1 + | CHAR(_) -> 1 + | ALT(r1, r2) -> 1 + (size r1) + (size r2) + | SEQ(r1, r2) -> 1 + (size r1) + (size r2) + | STAR(r) -> 1 + (size r) + | RECD(_, r) -> 1 + (size r);; + +(* nullable function: tests whether the regular + expression can recognise the empty string *) +let rec nullable r = match r with + NULL -> false + | EMPTY -> true + | CHAR(_) -> false + | ALT(r1, r2) -> nullable(r1) || nullable(r2) + | SEQ(r1, r2) -> nullable(r1) && nullable(r2) + | STAR(_) -> true + | RECD(_, r) -> nullable(r);; + +(* derivative of a regular expression r w.r.t. a character c *) +let rec der c r = match r with + NULL -> NULL + | EMPTY -> NULL + | CHAR(d) -> if c = d then EMPTY else NULL + | ALT(r1, r2) -> ALT(der c r1, der c r2) + | SEQ(r1, r2) -> + if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) + else SEQ(der c r1, r2) + | STAR(r) -> SEQ(der c r, STAR(r)) + | RECD(_, r) -> der c r;; + +(* derivative w.r.t. a list of chars (iterates der) *) +let rec ders s r = match s with + [] -> r + | c::s -> ders s (der c r);; + +(* extracts a string from value *) +let rec flatten v = match v with + Void -> "" + | Chr(c) -> String.make 1 c + | Left(v) -> flatten v + | Right(v) -> flatten v + | Sequ(v1, v2) -> flatten v1 ^ flatten v2 + | Stars(vs) -> String.concat "" (List.map flatten vs) + | Rec(_, v) -> flatten v;; + + +(* extracts an environment from a value *) +let rec env v = match v with + Void -> [] + | Chr(c) -> [] + | Left(v) -> env v + | Right(v) -> env v + | Sequ(v1, v2) -> env v1 @ env v2 + | Stars(vs) -> List.flatten (List.map env vs) + | Rec(x, v) -> (x, flatten v) :: env v;; + +let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";; +let string_of_env xs = String.concat "," (List.map string_of_pair xs);; + + +(* the value for a nullable rexp *) +let rec mkeps r = match r with + EMPTY -> Void + | ALT(r1, r2) -> + if nullable r1 then Left(mkeps r1) else Right(mkeps r2) + | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2) + | STAR(r) -> Stars([]) + | RECD(x, r) -> Rec(x, mkeps r);; + + +(* injection of a char into a value *) +let rec inj r c v = match r, v with + STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs) + | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) + | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) + | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) + | CHAR(d), Void -> Chr(d) + | RECD(x, r1), _ -> Rec(x, inj r1 c v);; + +(* some "rectification" functions for simplification *) +let f_id v = v;; +let f_right f = fun v -> Right(f v);; +let f_left f = fun v -> Left(f v);; +let f_alt f1 f2 = fun v -> match v with + Right(v) -> Right(f2 v) + | Left(v) -> Left(f1 v);; +let f_seq f1 f2 = fun v -> match v with + Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);; +let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);; +let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);; +let f_rec f = fun v -> match v with + Rec(x, v) -> Rec(x, f v);; + +exception ShouldNotHappen +let f_error v = raise ShouldNotHappen + +(* simplification of regular expressions returning also an + rectification function; no simplification under STARs *) +let rec simp r = match r with + ALT(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (r2s, f_right f2s) + | _, NULL -> (r1s, f_left f1s) + | _, _ -> if r1s = r2s then (r1s, f_left f1s) + else (ALT (r1s, r2s), f_alt f1s f2s)) + | SEQ(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (NULL, f_error) + | _, NULL -> (NULL, f_error) + | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) + | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) + | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) + | RECD(x, r1) -> + let (r1s, f1s) = simp r1 in + (RECD(x, r1s), f_rec f1s) + | r -> (r, f_id) +;; + +let rec der_simp c r = match r with + NULL -> (NULL, f_id) + | EMPTY -> (NULL, f_id) + | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id) + | ALT(r1, r2) -> + let (r1d, f1d) = der_simp c r1 in + let (r2d, f2d) = der_simp c r2 in + (match r1d, r2d with + NULL, _ -> (r2d, f_right f2d) + | _, NULL -> (r1d, f_left f1d) + | _, _ -> if r1d = r2d then (r1d, f_left f1d) + else (ALT (r1d, r2d), f_alt f1d f2d)) + | SEQ(r1, r2) -> + if nullable r1 + then + let (r1d, f1d) = der_simp c r1 in + let (r2d, f2d) = der_simp c r2 in + let (r2s, f2s) = simp r2 in + (match r1d, r2s, r2d with + NULL, _, _ -> (r2d, f_right f2d) + | _, NULL, _ -> (r2d, f_right f2d) + | _, _, NULL -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) + | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) + | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) + | _, _, _ -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)) + else + let (r1d, f1d) = der_simp c r1 in + let (r2s, f2s) = simp r2 in + (match r1d, r2s with + NULL, _ -> (NULL, f_error) + | _, NULL -> (NULL, f_error) + | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s) + | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s) + | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s)) + | STAR(r1) -> + let (r1d, f1d) = der_simp c r1 in + (match r1d with + NULL -> (NULL, f_error) + | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id) + | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id)) + | RECD(x, r1) -> der_simp c r1 + + +(* matcher function *) +let matcher r s = nullable(ders (explode s) r);; + +(* lexing function (produces a value) *) +exception LexError;; + +let rec lex r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> inj r c (lex (der c r) cs);; + +let lexing r s = lex r (explode s);; + +(* lexing with simplification *) +let rec lex_simp r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> + let (r_simp, f_simp) = simp (der c r) in + inj r c (f_simp (lex_simp r_simp cs));; + +let lexing_simp r s = lex_simp r (explode s);; + +let rec lex_simp2 r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> + let (r_simp, f_simp) = der_simp c r in + inj r c (f_simp (lex_simp2 r_simp cs));; + +let lexing_simp2 r s = lex_simp2 r (explode s);; + + +(* lexing with accumulation *) +let rec lex_acc r s f = match s with + [] -> if (nullable r) then f (mkeps r) else raise LexError + | c::cs -> + let (r_simp, f_simp) = simp (der c r) in + lex_acc r_simp cs (fun v -> f (inj r c (f_simp v)));; + +let lexing_acc r s = lex_acc r (explode s) (f_id);; + +let rec lex_acc2 r s f = match s with + [] -> if (nullable r) then f (mkeps r) else raise LexError + | c::cs -> + let (r_simp, f_simp) = der_simp c r in + lex_acc2 r_simp cs (fun v -> f (inj r c (f_simp v)));; + +let lexing_acc2 r s = lex_acc2 r (explode s) (f_id);; + + +(* Lexing rules for a small WHILE language *) +let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));; +let digit = alts (List.map chr (explode "0123456789"));; +let idents = sym -- STAR(sym ++ digit);; +let nums = plus(digit);; +let keywords = alts + (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);; +let semicolon = str ";" +let ops = alts + (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);; +let whitespace = plus(str " " ++ str "\n" ++ str "\t");; +let rparen = str ")";; +let lparen = str "(";; +let begin_paren = str "{";; +let end_paren = str "}";; + + +let while_regs = STAR(("k" $ keywords) ++ + ("i" $ idents) ++ + ("o" $ ops) ++ + ("n" $ nums) ++ + ("s" $ semicolon) ++ + ("p" $ (lparen ++ rparen)) ++ + ("b" $ (begin_paren ++ end_paren)) ++ + ("w" $ whitespace));; + + + +(* Some Tests + ============ *) + +let time f x = + let t = Sys.time() in + let f_x = (f x; f x; f x) in + (print_float ((Sys.time() -. t) /. 3.0); f_x);; + + +let prog0 = "read n";; + +let prog1 = "read n; write (n)";; +string_of_env (env (lexing_simp while_regs prog1));; + + +let prog2 = " +i := 2; +max := 100; +while i < max do { + isprime := 1; + j := 2; + while (j * j) <= i + 1 do { + if i % j == 0 then isprime := 0 else skip; + j := j + 1 + }; + if isprime == 1 then write i else skip; + i := i + 1 +}";; + +let tst1 = (lexing_simp while_regs prog2 = lexing_simp2 while_regs prog2) in +let tst2 = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) in +let tst3 = (lexing_simp while_regs prog2 = lexing_acc2 while_regs prog2) +in + print_string ("Sanity test simp vs simp2: >>" ^ (string_of_bool tst1) ^ "<<\n") ; + print_string ("Sanity test simp vs acc: >>" ^ (string_of_bool tst2) ^ "<<\n") ; + print_string ("Sanity test simp vs acc2: >>" ^ (string_of_bool tst3) ^ "<<") ; + print_newline ();; + + + +type range = + To of int * int;; + +let (---) i j = To(i, j);; + +let forby n = + fun range -> match range with To(lo, up) -> + (fun f -> + let rec loop lo = + if lo > up then () else (f lo; loop (lo + n)) + in loop lo);; + +let step_simp i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_simp while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_simp2 i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_simp2 while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_acc i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_acc while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_acc2 i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_acc2 while_regs) (string_repeat prog2 i) ; + print_newline ());; + +forby 100 (100 --- 700) step_simp;; +print_newline ();; +forby 100 (100 --- 700) step_simp2;; +print_newline ();; +forby 100 (100 --- 700) step_acc;; +print_newline ();; +forby 100 (100 --- 700) step_acc2;; +print_newline ();; +forby 1000 (1000 --- 5000) step_acc;; +print_newline ();; +forby 1000 (1000 --- 5000) step_acc2;; +(*print_newline ();;*) +(* forby 500 (100 --- 5000) step_simp;; *) +