progs/fsharp/re.ml
changeset 3 94824659f6d7
child 156 6a43ea9305ba
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/fsharp/re.ml	Fri Aug 15 14:10:58 2014 +0100
@@ -0,0 +1,256 @@
+
+type rexp =
+   NULL 
+ | EMPTY 
+ | CHAR of char
+ | ALT of rexp * rexp
+ | SEQ of rexp * rexp 
+ | STAR of rexp 
+ | RECD of string * rexp;;
+
+type value =
+   Void
+ | Chr of char
+ | Sequ of value * value
+ | Left of value
+ | Right of value
+ | Stars of value list
+ | Rec of string * value;;
+
+(* some helper functions for strings *)   
+let explode s = [for c in s -> c];;
+
+let string_repeat s n =  String.replicate n s;;
+
+(* some helper functions for rexps *)
+let rec seq s = match s with
+  | [] -> EMPTY
+  | [c] -> CHAR(c)
+  | c::cs -> SEQ(CHAR(c), seq cs);;
+
+let chr c = CHAR(c)
+
+let str s = seq(explode s);;
+
+let plus r = SEQ(r, STAR(r));;
+
+let (++) r1 r2 = ALT(r1, r2);;
+
+let (--) r1 r2 = SEQ(r1, r2);;
+
+let ($) x r = RECD(x, r);;
+
+let alts rs = match rs with 
+  | [] -> NULL
+  | [r] -> r
+  | r::rs -> List.fold (++) r rs;;
+
+
+(* size of a regular expressions - for testing purposes *)
+let rec size r = match r with
+  | NULL -> 1
+  | EMPTY -> 1
+  | CHAR(_) -> 1
+  | ALT(r1, r2) -> 1 + (size r1) + (size r2)
+  | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
+  | STAR(r) -> 1 + (size r)
+  | RECD(_, r) -> 1 + (size r);;
+
+(* nullable function: tests whether the regular 
+   expression can recognise the empty string *)
+let rec nullable r = match r with
+  | NULL -> false
+  | EMPTY -> true
+  | CHAR(_) -> false
+  | ALT(r1, r2) -> nullable(r1) || nullable(r2)
+  | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
+  | STAR(_) -> true
+  | RECD(_, r) -> nullable(r);;
+
+(* derivative of a regular expression r w.r.t. a character c *)
+let rec der c r = match r with 
+  | NULL -> NULL
+  | EMPTY -> NULL
+  | CHAR(d) -> if c = d then EMPTY else NULL
+  | ALT(r1, r2) -> ALT(der c r1, der c r2)
+  | SEQ(r1, r2) -> 
+      if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
+      else SEQ(der c r1, r2)
+  | STAR(r) -> SEQ(der c r, STAR(r))
+  | RECD(_, r) -> der c r;;
+
+(* derivative w.r.t. a list of chars (iterates der) *)
+let rec ders s r = match s with 
+  | [] -> r
+  | c::s -> ders s (der c r);;
+
+(* extracts a string from value *)
+let rec flatten v = match v with 
+  | Void -> ""
+  | Chr(c) -> System.Convert.ToString(c)
+  | Left(v) -> flatten v
+  | Right(v) -> flatten v
+  | Sequ(v1, v2) -> flatten v1 ^ flatten v2
+  | Stars(vs) -> String.concat "" (List.map flatten vs)
+  | Rec(_, v) -> flatten v;;
+
+
+(* extracts an environment from a value *)
+let rec env v = match v with
+  | Void -> []
+  | Chr(c) -> []
+  | Left(v) -> env v
+  | Right(v) -> env v
+  | Sequ(v1, v2) -> env v1 @ env v2
+  | Stars(vs) -> List.fold (@) [] (List.map env vs)
+  | Rec(x, v) -> (x, flatten v) :: env v;;
+
+let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";;
+let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
+
+
+(* the value for a nullable rexp *)
+let rec mkeps r = match r with
+  | EMPTY -> Void
+  | ALT(r1, r2) -> 
+      if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
+  | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
+  | STAR(r) -> Stars([])
+  | RECD(x, r) -> Rec(x, mkeps r);;
+
+
+(* injection of a char into a value *)
+let rec inj r c v = match r, v with
+  | STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs)
+  | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
+  | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
+  | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
+  | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
+  | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
+  | CHAR(d), Void -> Chr(d) 
+  | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
+
+(* some "rectification" functions for simplification *)
+let f_id v = v;;
+let f_right f = fun v -> Right(f v);;
+let f_left f = fun v -> Left(f v);;
+let f_alt f1 f2 = fun v -> match v with 
+    Right(v) -> Right(f2 v)
+  | Left(v) -> Left(f1 v);;
+let f_seq f1 f2 = fun v -> match v with 
+  Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
+let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);;
+let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);;
+let f_rec f = fun v -> match v with
+    Rec(x, v) -> Rec(x, f v);;
+
+(* simplification of regular expressions returning also an 
+   rectification function; no simplification under STARs *)
+let rec simp r = match r with
+    ALT(r1, r2) -> 
+      let (r1s, f1s) = simp r1 in 
+      let (r2s, f2s) = simp r2 in
+      (match r1s, r2s with
+          NULL, _ -> (r2s, f_right f2s)
+        | _, NULL -> (r1s, f_left f1s)
+        | _, _    -> if r1s = r2s then (r1s, f_left f1s)
+                     else (ALT (r1s, r2s), f_alt f1s f2s)) 
+  | SEQ(r1, r2) -> 
+      let (r1s, f1s) = simp r1 in
+      let (r2s, f2s) = simp r2 in
+      (match r1s, r2s with
+          NULL, _  -> (NULL, f_right f2s)
+        | _, NULL  -> (NULL, f_left f1s)
+        | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s)
+        | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s)
+        | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
+  | RECD(x, r1) -> 
+      let (r1s, f1s) = simp r1 in
+      (RECD(x, r1s), f_rec f1s)
+  | r -> (r, f_id)
+;;
+
+(* matcher function *)
+let matcher r s = nullable(ders (explode s) r);;
+
+(* lexing function (produces a value) *)
+exception LexError;;
+
+let rec lex r s = match s with
+    [] -> if (nullable r) then mkeps r else raise LexError
+  | c::cs -> inj r c (lex (der c r) cs);;
+
+let lexing r s = lex r (explode s);;
+
+(* lexing with simplification *)
+let rec lex_simp r s = match s with
+    [] -> if (nullable r) then mkeps r else raise LexError
+  | c::cs -> 
+    let (r_simp, f_simp) = simp (der c r) in
+    inj r c (f_simp (lex_simp r_simp cs));;
+
+let lexing_simp r s = lex_simp r (explode s);;
+
+
+
+
+(* Lexing rules for a small WHILE language *)
+let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));;
+let digit = alts (List.map chr (explode "0123456789"));;
+let idents =  sym -- STAR(sym ++ digit);;
+let nums = plus(digit);;
+let keywords = alts (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);;
+let semicolon = str ";"
+let ops = alts (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);;
+let whitespace = plus(str " " ++ str "\n" ++ str "\t");;
+let rparen = str ")";;
+let lparen = str "(";;
+let begin_paren = str "{";;
+let end_paren = str "}";;
+
+
+let while_regs = STAR(("k" $ keywords) ++
+                      ("i" $ idents) ++
+                      ("o" $ ops) ++ 
+                      ("n" $ nums) ++ 
+                      ("s" $ semicolon) ++ 
+                      ("p" $ (lparen ++ rparen)) ++ 
+                      ("b" $ (begin_paren ++ end_paren)) ++ 
+                      ("w" $ whitespace));;
+
+
+
+(* Some Tests
+  ============ *)
+
+let time f x =
+  let t = System.DateTime.Now in
+  let f_x = f x in
+  (printfn "%O" (System.DateTime.Now - t); f_x);;
+
+let prog0 = "read n";;
+string_of_env (env (lexing while_regs prog0));;
+
+let prog1 = "read  n; write (n)";;
+string_of_env (env (lexing_simp while_regs prog1));;
+
+
+let prog2 = "
+i := 2;
+max := 100;
+while i < max do {
+  isprime := 1;
+  j := 2;
+  while (j * j) <= i + 1  do {
+    if i % j == 0 then isprime := 0  else skip;
+    j := j + 1
+  };
+  if isprime == 1 then write i else skip;
+  i := i + 1
+}";;
+
+for i = 1 to 100 do
+  printf "%i: " i ;
+  time (lexing_simp while_regs) (string_repeat prog2 i);
+done;;
+