diff -r 2bc119fc8657 -r 94824659f6d7 progs/sml/re.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/sml/re.ML Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,426 @@ + +datatype rexp = + NULL + | EMPTY + | CHAR of char + | ALT of rexp * rexp + | SEQ of rexp * rexp + | STAR of rexp + | RECD of string * rexp + +datatype value = + Void + | Chr of char + | Sequ of value * value + | Left of value + | Right of value + | Stars of value list + | Rec of string * value + +(* some helper functions for strings *) +fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s)) + +(* some helper functions for rexps *) +fun seq s = case s of + [] => EMPTY + | [c] => CHAR(c) + | c::cs => SEQ(CHAR(c), seq cs) + +fun chr c = CHAR(c) + +fun str s = seq(explode s) + +fun plus r = SEQ(r, STAR(r)) + +infix 9 ++ +infix 9 -- +infix 9 $ + +fun op ++ (r1, r2) = ALT(r1, r2) + +fun op -- (r1, r2) = SEQ(r1, r2) + +fun op $ (x, r) = RECD(x, r) + +fun alts rs = case rs of + [] => NULL + | [r] => r + | r::rs => List.foldl (op ++) r rs + + +(* size of a regular expressions - for testing purposes *) +fun size r = case r of + NULL => 1 + | EMPTY => 1 + | CHAR(_) => 1 + | ALT(r1, r2) => 1 + (size r1) + (size r2) + | SEQ(r1, r2) => 1 + (size r1) + (size r2) + | STAR(r) => 1 + (size r) + | RECD(_, r) => 1 + (size r) + +(* nullable function: tests whether the regular + expression can recognise the empty string *) +fun nullable r = case r of + NULL => false + | EMPTY => true + | CHAR(_) => false + | ALT(r1, r2) => nullable(r1) orelse nullable(r2) + | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) + | STAR(_) => true + | RECD(_, r) => nullable(r) + +(* derivative of a regular expression r w.r.t. a character c *) +fun der c r = case r of + NULL => NULL + | EMPTY => NULL + | CHAR(d) => if c = d then EMPTY else NULL + | ALT(r1, r2) => ALT(der c r1, der c r2) + | SEQ(r1, r2) => + if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) + else SEQ(der c r1, r2) + | STAR(r) => SEQ(der c r, STAR(r)) + | RECD(_, r) => der c r + +(* derivative w.r.t. a list of chars (iterates der) *) +fun ders s r = case s of + [] => r + | c::s => ders s (der c r) + +(* extracts a string from value *) +fun flatten v = case v of + Void => "" + | Chr(c) => Char.toString c + | Left(v) => flatten v + | Right(v) => flatten v + | Sequ(v1, v2) => flatten v1 ^ flatten v2 + | Stars(vs) => String.concat (List.map flatten vs) + | Rec(_, v) => flatten v + + +(* extracts an environment from a value *) +fun env v = case v of + Void => [] + | Chr(c) => [] + | Left(v) => env v + | Right(v) => env v + | Sequ(v1, v2) => env v1 @ env v2 + | Stars(vs) => List.foldr (op @) [] (List.map env vs) + | Rec(x, v) => (x, flatten v) :: env v + +fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")" +fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) + + +(* the value for a nullable rexp *) +fun mkeps r = case r of + EMPTY => Void + | ALT(r1, r2) => + if nullable r1 then Left(mkeps r1) else Right(mkeps r2) + | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) + | STAR(r) => Stars([]) + | RECD(x, r) => Rec(x, mkeps r) + +exception Error + +(* injection of a char into a value *) +fun inj r c v = case (r, v) of + (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs) + | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) + | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) + | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) + | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) + | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) + | (CHAR(d), Void) => Chr(d) + | (RECD(x, r1), _) => Rec(x, inj r1 c v) + | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); + print ("v: " ^ PolyML.makestring v ^ "\n"); + raise Error) + +(* some "rectification" functions for simplification *) +fun f_id v = v +fun f_right f = fn v => Right(f v) +fun f_left f = fn v => Left(f v) +fun f_alt f1 f2 = fn v => case v of + Right(v) => Right(f2 v) + | Left(v) => Left(f1 v) +fun f_seq f1 f2 = fn v => case v of + Sequ(v1, v2) => Sequ(f1 v1, f2 v2) +fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v) +fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void) +fun f_rec f = fn v => case v of + Rec(x, v) => Rec(x, f v) + +exception ShouldNotHappen + +fun f_error v = raise ShouldNotHappen + +(* simplification of regular expressions returning also an + rectification function; no simplification under STARs *) +fun simp r = case r of + ALT(r1, r2) => + let val (r1s, f1s) = simp r1 + val (r2s, f2s) = simp r2 in + (case (r1s, r2s) of + (NULL, _) => (r2s, f_right f2s) + | (_, NULL) => (r1s, f_left f1s) + | (_, _) => if r1s = r2s then (r1s, f_left f1s) + else (ALT (r1s, r2s), f_alt f1s f2s)) + end + | SEQ(r1, r2) => + let val (r1s, f1s) = simp r1 + val (r2s, f2s) = simp r2 in + (case (r1s, r2s) of + (NULL, _) => (NULL, f_error) + | (_, NULL) => (NULL, f_error) + | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s) + | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s) + | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) + end + | RECD(x, r1) => + let val (r1s, f1s) = simp r1 in + (RECD(x, r1s), f_rec f1s) + end + | r => (r, f_id) + +fun der_simp c r = case r of + NULL => (NULL, f_id) + | EMPTY => (NULL, f_id) + | CHAR(d) => ((if c = d then EMPTY else NULL), f_id) + | ALT(r1, r2) => + let + val (r1d, f1d) = der_simp c r1 + val (r2d, f2d) = der_simp c r2 + in + case (r1d, r2d) of + (NULL, _) => (r2d, f_right f2d) + | (_, NULL) => (r1d, f_left f1d) + | (_, _) => if r1d = r2d then (r1d, f_left f1d) + else (ALT (r1d, r2d), f_alt f1d f2d) + end + | SEQ(r1, r2) => + if nullable r1 + then + let + val (r1d, f1d) = der_simp c r1 + val (r2d, f2d) = der_simp c r2 + val (r2s, f2s) = simp r2 + in + case (r1d, r2s, r2d) of + (NULL, _, _) => (r2d, f_right f2d) + | (_, NULL, _) => (r2d, f_right f2d) + | (_, _, NULL) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) + | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) + | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) + | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) + end + else + let + val (r1d, f1d) = der_simp c r1 + val (r2s, f2s) = simp r2 + in + case (r1d, r2s) of + (NULL, _) => (NULL, f_error) + | (_, NULL) => (NULL, f_error) + | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s) + | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s) + | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) + end + | STAR(r1) => + let + val (r1d, f1d) = der_simp c r1 + in + case r1d of + NULL => (NULL, f_error) + | EMPTY => (STAR r1, f_seq_Void1 f1d f_id) + | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) + end + | RECD(x, r1) => der_simp c r1 + + + +(* matcher function *) +fun matcher r s = nullable(ders (explode s) r) + +(* lexing function (produces a value) *) +exception LexError + +fun lex r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => inj r c (lex (der c r) cs) + +fun lexing r s = lex r (explode s) + +(* lexing with simplification *) +fun lex_simp r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => + let val (r_simp, f_simp) = simp (der c r) in + inj r c (f_simp (lex_simp r_simp cs)) + end + +fun lexing_simp r s = lex_simp r (explode s) + +fun lex_simp2 r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => + let val (r_simp, f_simp) = der_simp c r in + inj r c (f_simp (lex_simp2 r_simp cs)) + end + +fun lexing_simp2 r s = lex_simp2 r (explode s) + +fun lex_acc r s f = case s of + [] => if (nullable r) then f (mkeps r) else raise LexError + | c::cs => + let val (r_simp, f_simp) = simp (der c r) in + lex_acc r_simp cs (fn v => f (inj r c (f_simp v))) + end + +fun lexing_acc r s = lex_acc r (explode s) (f_id) + +fun lex_acc2 r s f = case s of + [] => if (nullable r) then f (mkeps r) else raise LexError + | c::cs => + let val (r_simp, f_simp) = der_simp c r in + lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v))) + end + +fun lexing_acc2 r s = lex_acc2 r (explode s) (f_id) + + +(* Lexing rules for a small WHILE language *) +val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz")) +val digit = alts (List.map chr (explode "0123456789")) +val idents = sym -- STAR(sym ++ digit) +val nums = plus(digit) +val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"]) +val semicolon = str ";" +val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"]) +val whitespace = plus(str " " ++ str "\n" ++ str "\t") +val rparen = str ")" +val lparen = str "(" +val begin_paren = str "{" +val end_paren = str "}" + + +val while_regs = STAR(("k" $ keywords) ++ + ("i" $ idents) ++ + ("o" $ ops) ++ + ("n" $ nums) ++ + ("s" $ semicolon) ++ + ("p" $ (lparen ++ rparen)) ++ + ("b" $ (begin_paren ++ end_paren)) ++ + ("w" $ whitespace)) + + + +(* Some Tests + ============ *) + +fun time f x = + let + val t_start = Timer.startCPUTimer() + val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x) + val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0 +in + (print ((Real.toString t_end) ^ "\n"); f_x) +end + +val prog = "ab"; +val reg = ("x" $ ((str "a") -- (str "b"))); +print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n"); +print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n"); +print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n"); + +fun fst (x, y) = x; +fun snd (x, y) = y; + +val derS = [reg, + der #"a" reg, + fst (simp (der #"a" reg)), + fst (der_simp #"a" reg)]; + +val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")), + (snd (der_simp #"a" reg)) (Chr(#"b")) + ]; + +print("Ders: \n" ^ + String.concatWith "\n" (List.map PolyML.makestring derS) + ^ "\n\n"); +print("Vs: \n" ^ + String.concatWith "\n" (List.map PolyML.makestring vS) + ^ "\n\n"); + + +val prog0 = "read n"; +print("Env0 is: \n" ^ string_of_env (env (lexing_acc while_regs prog0)) ^ "\n"); + +val prog1 = "read n; write (n)"; +print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n"); + + +val prog2 = String.concatWith "\n" + ["i := 2;", + "max := 100;", + "while i < max do {", + " isprime := 1;", + " j := 2;", + " while (j * j) <= i + 1 do {", + " if i % j == 0 then isprime := 0 else skip;", + " j := j + 1", + " };", + " if isprime == 1 then write i else skip;", + " i := i + 1", + "}"]; + + +let + val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) +in + print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n") +end; + +(* loops in ML *) +datatype for = to of int * int +infix to + +val for = + fn lo to up => + (fn f => + let fun loop lo = + if lo > up then () else (f lo; loop (lo + 1)) + in loop lo end) + +fun forby n = + fn lo to up => + (fn f => + let fun loop lo = + if lo > up then () else (f lo; loop (lo + n)) + in loop lo end) + + +fun step_simp i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_simp while_regs) (string_repeat prog2 i)) + +fun step_simp2 i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_simp2 while_regs) (string_repeat prog2 i)) + +fun step_acc i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_acc while_regs) (string_repeat prog2 i)) + +fun step_acc2 i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_acc2 while_regs) (string_repeat prog2 i)) + +val main1 = forby 1000 (1000 to 5000) step_simp; +print "\n"; +val main2 = forby 1000 (1000 to 5000) step_simp2; +print "\n"; +val main3 = forby 1000 (1000 to 5000) step_acc; +print "\n"; +val main4 = forby 1000 (1000 to 5000) step_acc2; + +