# HG changeset patch # User Christian Urban # Date 1408108258 -3600 # Node ID 94824659f6d767e1f774954642d2c406f4241036 # Parent 2bc119fc8657accc2491ddd32de02e759c7af97c added all toy implementations diff -r 2bc119fc8657 -r 94824659f6d7 progs/fsharp/re.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/fsharp/re.ml Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,256 @@ + +type rexp = + NULL + | EMPTY + | CHAR of char + | ALT of rexp * rexp + | SEQ of rexp * rexp + | STAR of rexp + | RECD of string * rexp;; + +type value = + Void + | Chr of char + | Sequ of value * value + | Left of value + | Right of value + | Stars of value list + | Rec of string * value;; + +(* some helper functions for strings *) +let explode s = [for c in s -> c];; + +let string_repeat s n = String.replicate n s;; + +(* some helper functions for rexps *) +let rec seq s = match s with + | [] -> EMPTY + | [c] -> CHAR(c) + | c::cs -> SEQ(CHAR(c), seq cs);; + +let chr c = CHAR(c) + +let str s = seq(explode s);; + +let plus r = SEQ(r, STAR(r));; + +let (++) r1 r2 = ALT(r1, r2);; + +let (--) r1 r2 = SEQ(r1, r2);; + +let ($) x r = RECD(x, r);; + +let alts rs = match rs with + | [] -> NULL + | [r] -> r + | r::rs -> List.fold (++) r rs;; + + +(* size of a regular expressions - for testing purposes *) +let rec size r = match r with + | NULL -> 1 + | EMPTY -> 1 + | CHAR(_) -> 1 + | ALT(r1, r2) -> 1 + (size r1) + (size r2) + | SEQ(r1, r2) -> 1 + (size r1) + (size r2) + | STAR(r) -> 1 + (size r) + | RECD(_, r) -> 1 + (size r);; + +(* nullable function: tests whether the regular + expression can recognise the empty string *) +let rec nullable r = match r with + | NULL -> false + | EMPTY -> true + | CHAR(_) -> false + | ALT(r1, r2) -> nullable(r1) || nullable(r2) + | SEQ(r1, r2) -> nullable(r1) && nullable(r2) + | STAR(_) -> true + | RECD(_, r) -> nullable(r);; + +(* derivative of a regular expression r w.r.t. a character c *) +let rec der c r = match r with + | NULL -> NULL + | EMPTY -> NULL + | CHAR(d) -> if c = d then EMPTY else NULL + | ALT(r1, r2) -> ALT(der c r1, der c r2) + | SEQ(r1, r2) -> + if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) + else SEQ(der c r1, r2) + | STAR(r) -> SEQ(der c r, STAR(r)) + | RECD(_, r) -> der c r;; + +(* derivative w.r.t. a list of chars (iterates der) *) +let rec ders s r = match s with + | [] -> r + | c::s -> ders s (der c r);; + +(* extracts a string from value *) +let rec flatten v = match v with + | Void -> "" + | Chr(c) -> System.Convert.ToString(c) + | Left(v) -> flatten v + | Right(v) -> flatten v + | Sequ(v1, v2) -> flatten v1 ^ flatten v2 + | Stars(vs) -> String.concat "" (List.map flatten vs) + | Rec(_, v) -> flatten v;; + + +(* extracts an environment from a value *) +let rec env v = match v with + | Void -> [] + | Chr(c) -> [] + | Left(v) -> env v + | Right(v) -> env v + | Sequ(v1, v2) -> env v1 @ env v2 + | Stars(vs) -> List.fold (@) [] (List.map env vs) + | Rec(x, v) -> (x, flatten v) :: env v;; + +let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";; +let string_of_env xs = String.concat "," (List.map string_of_pair xs);; + + +(* the value for a nullable rexp *) +let rec mkeps r = match r with + | EMPTY -> Void + | ALT(r1, r2) -> + if nullable r1 then Left(mkeps r1) else Right(mkeps r2) + | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2) + | STAR(r) -> Stars([]) + | RECD(x, r) -> Rec(x, mkeps r);; + + +(* injection of a char into a value *) +let rec inj r c v = match r, v with + | STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs) + | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) + | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) + | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) + | CHAR(d), Void -> Chr(d) + | RECD(x, r1), _ -> Rec(x, inj r1 c v);; + +(* some "rectification" functions for simplification *) +let f_id v = v;; +let f_right f = fun v -> Right(f v);; +let f_left f = fun v -> Left(f v);; +let f_alt f1 f2 = fun v -> match v with + Right(v) -> Right(f2 v) + | Left(v) -> Left(f1 v);; +let f_seq f1 f2 = fun v -> match v with + Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);; +let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);; +let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);; +let f_rec f = fun v -> match v with + Rec(x, v) -> Rec(x, f v);; + +(* simplification of regular expressions returning also an + rectification function; no simplification under STARs *) +let rec simp r = match r with + ALT(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (r2s, f_right f2s) + | _, NULL -> (r1s, f_left f1s) + | _, _ -> if r1s = r2s then (r1s, f_left f1s) + else (ALT (r1s, r2s), f_alt f1s f2s)) + | SEQ(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (NULL, f_right f2s) + | _, NULL -> (NULL, f_left f1s) + | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) + | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) + | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) + | RECD(x, r1) -> + let (r1s, f1s) = simp r1 in + (RECD(x, r1s), f_rec f1s) + | r -> (r, f_id) +;; + +(* matcher function *) +let matcher r s = nullable(ders (explode s) r);; + +(* lexing function (produces a value) *) +exception LexError;; + +let rec lex r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> inj r c (lex (der c r) cs);; + +let lexing r s = lex r (explode s);; + +(* lexing with simplification *) +let rec lex_simp r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> + let (r_simp, f_simp) = simp (der c r) in + inj r c (f_simp (lex_simp r_simp cs));; + +let lexing_simp r s = lex_simp r (explode s);; + + + + +(* Lexing rules for a small WHILE language *) +let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));; +let digit = alts (List.map chr (explode "0123456789"));; +let idents = sym -- STAR(sym ++ digit);; +let nums = plus(digit);; +let keywords = alts (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);; +let semicolon = str ";" +let ops = alts (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);; +let whitespace = plus(str " " ++ str "\n" ++ str "\t");; +let rparen = str ")";; +let lparen = str "(";; +let begin_paren = str "{";; +let end_paren = str "}";; + + +let while_regs = STAR(("k" $ keywords) ++ + ("i" $ idents) ++ + ("o" $ ops) ++ + ("n" $ nums) ++ + ("s" $ semicolon) ++ + ("p" $ (lparen ++ rparen)) ++ + ("b" $ (begin_paren ++ end_paren)) ++ + ("w" $ whitespace));; + + + +(* Some Tests + ============ *) + +let time f x = + let t = System.DateTime.Now in + let f_x = f x in + (printfn "%O" (System.DateTime.Now - t); f_x);; + +let prog0 = "read n";; +string_of_env (env (lexing while_regs prog0));; + +let prog1 = "read n; write (n)";; +string_of_env (env (lexing_simp while_regs prog1));; + + +let prog2 = " +i := 2; +max := 100; +while i < max do { + isprime := 1; + j := 2; + while (j * j) <= i + 1 do { + if i % j == 0 then isprime := 0 else skip; + j := j + 1 + }; + if isprime == 1 then write i else skip; + i := i + 1 +}";; + +for i = 1 to 100 do + printf "%i: " i ; + time (lexing_simp while_regs) (string_repeat prog2 i); +done;; + diff -r 2bc119fc8657 -r 94824659f6d7 progs/haskell/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/haskell/README Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,4 @@ +compile with + + +ghc -O3 --make -with-rtsopts='-K128m' re.hs \ No newline at end of file diff -r 2bc119fc8657 -r 94824659f6d7 progs/haskell/re.hs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/haskell/re.hs Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,400 @@ +import System.Environment +import Data.List +import Text.Printf +import Control.Exception +import System.CPUTime +import Control.Parallel.Strategies +import Control.Monad + +lim :: Int +lim = 1 +-- lim = 10^6 + +time :: (Num t, NFData t) => t -> IO () +time y = do + start <- getCPUTime + replicateM_ lim $ do + x <- evaluate $ 1 + y + rdeepseq x `seq` return () + end <- getCPUTime + let diff = (fromIntegral (end - start)) / (10^12) + printf "%0.9f\n" (diff :: Double) + return () + +data Rexp = + NULL + | EMPTY + | CHAR Char + | ALT Rexp Rexp + | SEQ Rexp Rexp + | STAR Rexp + | RECD String Rexp deriving (Eq, Show) + +data Value = + Void + | Chr Char + | Sequ Value Value + | Lf Value + | Rg Value + | Stars [Value] + | Rec String Value deriving (Eq, Show) + +string_repeat :: String -> Int -> String +string_repeat s n = concat (replicate n s) + +sequ :: [Char] -> Rexp +sequ s = case s of + [] -> EMPTY + [c] -> CHAR c + c:cs -> SEQ (CHAR c) (sequ cs) + + +str :: String -> Rexp +str s = sequ s + +plus :: Rexp -> Rexp +plus r = SEQ r (STAR r) + +(\/) :: Rexp -> Rexp -> Rexp +r1 \/ r2 = ALT r1 r2 + +(~~) :: Rexp -> Rexp -> Rexp +r1 ~~ r2 = SEQ r1 r2 + +($$) :: String -> Rexp -> Rexp +x $$ r = RECD x r + +alts :: [Rexp] -> Rexp +alts rs = case rs of + [] -> NULL + [r] -> r + r:rs -> foldl (ALT) r rs + +size :: Rexp -> Int +size r = case r of + NULL -> 1 + EMPTY -> 1 + CHAR _ -> 1 + ALT r1 r2 -> 1 + (size r1) + (size r2) + SEQ r1 r2 -> 1 + (size r1) + (size r2) + STAR r -> 1 + (size r) + RECD _ r -> 1 + (size r) + +nullable :: Rexp -> Bool +nullable r = case r of + NULL -> False + EMPTY -> True + CHAR _ -> False + ALT r1 r2 -> nullable(r1) || nullable(r2) + SEQ r1 r2 -> nullable(r1) && nullable(r2) + STAR _ -> True + RECD _ r -> nullable(r) + +der :: Char -> Rexp -> Rexp +der c r = case r of + NULL -> NULL + EMPTY -> NULL + CHAR d -> if c == d then EMPTY else NULL + ALT r1 r2 -> ALT (der c r1) (der c r2) + SEQ r1 r2 -> + if nullable r1 then ALT (SEQ (der c r1) r2) (der c r2) + else SEQ (der c r1) r2 + STAR r -> SEQ (der c r) (STAR r) + RECD _ r -> der c r + +ders :: [Char] -> Rexp -> Rexp +ders s r = case s of + [] -> r + c:s -> ders s (der c r) + +flatten :: Value -> String +flatten v = case v of + Void -> "" + Chr c -> [c] + Lf v -> flatten v + Rg v -> flatten v + Sequ v1 v2 -> flatten v1 ++ flatten v2 + Stars vs -> concat (map flatten vs) + Rec _ v -> flatten v + +env :: Value -> [(String, String)] +env v = case v of + Void -> [] + Chr c -> [] + Lf v -> env v + Rg v -> env v + Sequ v1 v2 -> env v1 ++ env v2 + Stars vs -> foldl (++) [] (map env vs) + Rec x v -> (x, flatten v) : env v + +string_of_pair :: (String, String) -> String +string_of_pair (x, s) = "(" ++ x ++ "," ++ s ++ ")" + +string_of_env :: [(String, String)] -> String +string_of_env xs = intercalate "," (map string_of_pair xs) + +mkeps :: Rexp -> Value +mkeps r = case r of + EMPTY -> Void + ALT r1 r2 -> + if nullable r1 then Lf (mkeps r1) else Rg (mkeps r2) + SEQ r1 r2 -> Sequ (mkeps r1) (mkeps r2) + STAR r -> Stars [] + RECD x r -> Rec x (mkeps r) + +inj :: Rexp -> Char -> Value -> Value +inj r c v = case (r, v) of + (STAR r, Sequ v1 (Stars vs)) -> Stars (inj r c v1 : vs) + (SEQ r1 r2, Sequ v1 v2) -> Sequ (inj r1 c v1) v2 + (SEQ r1 r2, Lf (Sequ v1 v2)) -> Sequ (inj r1 c v1) v2 + (SEQ r1 r2, Rg v2) -> Sequ (mkeps r1) (inj r2 c v2) + (ALT r1 r2, Lf v1) -> Lf (inj r1 c v1) + (ALT r1 r2, Rg v2) -> Rg (inj r2 c v2) + (CHAR d, Void) -> Chr d + (RECD x r1, _) -> Rec x (inj r1 c v) + +f_id :: Value -> Value +f_id v = v + +f_right :: (Value -> Value) -> Value -> Value +f_right f = \v -> Rg (f v) + +f_left :: (Value -> Value) -> Value -> Value +f_left f = \v -> Lf (f v) + +f_alt :: (Value -> Value) -> (Value -> Value) -> Value -> Value +f_alt f1 f2 = \v -> case v of + Rg v -> Rg (f2 v) + Lf v -> Lf (f1 v) + +f_seq :: (Value -> Value) -> (Value -> Value) -> Value -> Value +f_seq f1 f2 = \v -> case v of + Sequ v1 v2 -> Sequ (f1 v1) (f2 v2) + +f_seq_void1 :: (Value -> Value) -> (Value -> Value) -> Value -> Value +f_seq_void1 f1 f2 = \v -> Sequ (f1 Void) (f2 v) + +f_seq_void2 :: (Value -> Value) -> (Value -> Value) -> Value -> Value +f_seq_void2 f1 f2 = \v -> Sequ(f1 v) (f2 Void) + +f_rec :: (Value -> Value) -> Value -> Value +f_rec f = \v -> case v of + Rec x v -> Rec x (f v) + +simp :: Rexp -> (Rexp, Value -> Value) +simp r = case r of + ALT r1 r2 -> + let (r1s, f1s) = simp r1 + (r2s, f2s) = simp r2 + in + (case (r1s, r2s) of + (NULL, _) -> (r2s, f_right f2s) + (_, NULL) -> (r1s, f_left f1s) + (_, _) -> if r1s == r2s then (r1s, f_left f1s) + else (ALT r1s r2s, f_alt f1s f2s)) + SEQ r1 r2 -> + let (r1s, f1s) = simp r1 + (r2s, f2s) = simp r2 + in + (case (r1s, r2s) of + (NULL, _) -> (NULL, f_right f2s) + (_, NULL) -> (NULL, f_left f1s) + (EMPTY, _) -> (r2s, f_seq_void1 f1s f2s) + (_, EMPTY) -> (r1s, f_seq_void2 f1s f2s) + (_, _) -> (SEQ r1s r2s, f_seq f1s f2s)) + RECD x r1 -> + let (r1s, f1s) = simp r1 + in + (RECD x r1s, f_rec f1s) + r -> (r, f_id) + +der_simp :: Char -> Rexp -> (Rexp, Value -> Value) +der_simp c r = case r of + NULL -> (NULL, f_id) + EMPTY -> (NULL, f_id) + CHAR(d) -> ((if c == d then EMPTY else NULL), f_id) + ALT r1 r2 -> + let (r1d, f1d) = der_simp c r1 + (r2d, f2d) = der_simp c r2 + in + (case (r1d, r2d) of + (NULL, _) -> (r2d, f_right f2d) + (_, NULL) -> (r1d, f_left f1d) + (_, _) -> if r1d == r2d then (r1d, f_left f1d) + else (ALT r1d r2d, f_alt f1d f2d)) + SEQ r1 r2 -> + if nullable r1 + then + let (r1d, f1d) = der_simp c r1 + (r2d, f2d) = der_simp c r2 + (r2s, f2s) = simp r2 + in + (case (r1d, r2s, r2d) of + (NULL, _, _) -> (r2d, f_right f2d) + (_, NULL, _) -> (r2d, f_right f2d) + (_, _, NULL) -> (SEQ r1d r2s, f_left (f_seq f1d f2s)) + (EMPTY, _, _) -> (ALT r2s r2d, f_alt (f_seq_void1 f1d f2s) f2d) + (_, EMPTY, _) -> (ALT r1d r2d, f_alt (f_seq_void2 f1d f2s) f2d) + (_, _, _) -> (ALT (SEQ r1d r2s) r2d, f_alt (f_seq f1d f2s) f2d)) + else + let (r1d, f1d) = der_simp c r1 + (r2s, f2s) = simp r2 + in + (case (r1d, r2s) of + (NULL, _) -> (NULL, f_id) + (_, NULL) -> (NULL, f_id) + (EMPTY, _) -> (r2s, f_seq_void1 f1d f2s) + (_, EMPTY) -> (r1d, f_seq_void2 f1d f2s) + (_, _) -> (SEQ r1d r2s, f_seq f1d f2s)) + STAR r1 -> + let (r1d, f1d) = der_simp c r1 + in + (case r1d of + NULL -> (NULL, f_id) + EMPTY -> (STAR r1, f_seq_void1 f1d f_id) + _ -> (SEQ r1d (STAR r1), f_seq f1d f_id)) + RECD x r1 -> der_simp c r1 + + + +matcher :: Rexp -> String -> Bool +matcher r s = nullable (ders s r) + +lex0 :: Rexp -> String -> Maybe Value +lex0 r s = case s of + [] -> if (nullable r) + then Just (mkeps r) + else Nothing + c:cs -> do res <- lex0 (der c r) cs + return (inj r c res) + +lex_simp :: Rexp -> String -> Maybe Value +lex_simp r s = case s of + [] -> if (nullable r) + then Just (mkeps r) + else Nothing + c:cs -> let + (r_simp, f_simp) = simp (der c r) + in + do + res <- lex_simp r_simp cs + return (inj r c (f_simp res)) + +lex_simp2 :: Rexp -> String -> Maybe Value +lex_simp2 r s = case s of + [] -> if (nullable r) + then Just (mkeps r) + else Nothing + c:cs -> let + (r_simp, f_simp) = der_simp c r + in + do + res <- lex_simp2 r_simp cs + return (inj r c (f_simp res)) + +lex_acc :: Rexp -> String -> (Value -> Value) -> Maybe Value +lex_acc r s f = case s of + [] -> if (nullable r) + then Just (f (mkeps r)) + else Nothing + c:cs -> let + (r_simp, f_simp) = simp (der c r) + in + lex_acc r_simp cs (\v -> f (inj r c (f_simp v))) + +lex_acc2 :: Rexp -> String -> (Value -> Value) -> Maybe Value +lex_acc2 r s f = case s of + [] -> if (nullable r) + then Just (f (mkeps r)) + else Nothing + c:cs -> let + (r_simp, f_simp) = der_simp c r + in + lex_acc2 r_simp cs (\v -> f (inj r c (f_simp v))) + +sym = alts (map CHAR "abcdefghijklmnopqrstuvwxyz") +digit = alts (map CHAR "0123456789") +idents = sym ~~ STAR(sym \/ digit) +nums = plus digit +keywords = alts (map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"]) +semicolon = str ";" +ops = alts (map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"]) +whitespace = plus(str " " \/ str "\n" \/ str "\t") +rparen = str ")" +lparen = str "(" +begin_paren = str "{" +end_paren = str "}" + +while_regs = STAR(("k" $$ keywords) \/ + ("i" $$ idents) \/ + ("o" $$ ops) \/ + ("n" $$ nums) \/ + ("s" $$ semicolon) \/ + ("p" $$ (lparen \/ rparen)) \/ + ("b" $$ (begin_paren \/ end_paren)) \/ + ("w" $$ whitespace)) + +prog2 = intercalate "\n" + ["i := 2;", + "max := 100;", + "while i < max do {", + " isprime := 1;", + " j := 2;", + " while (j * j) <= i + 1 do {", + " if i % j == 0 then isprime := 0 else skip;", + " j := j + 1", + " };", + " if isprime == 1 then write i else skip;", + " i := i + 1", + "}"] + + +lexing_simp :: Int -> Int +lexing_simp n = case (lex_simp while_regs (string_repeat prog2 n)) of + Just result -> 1 + Nothing -> 0 + +step_simp :: Int -> IO () +step_simp n = do + putStr (show n ++ ": ") + time (lexing_simp n) + +lexing_simp2 :: Int -> Int +lexing_simp2 n = case (lex_simp2 while_regs (string_repeat prog2 n)) of + Just result -> 1 + Nothing -> 0 + +step_simp2 :: Int -> IO () +step_simp2 n = do + putStr (show n ++ ": ") + time (lexing_simp2 n) + +lexing_acc :: Int -> Int +lexing_acc n = case (lex_acc while_regs (string_repeat prog2 n) f_id) of + Just result -> 1 + Nothing -> 0 + +step_acc :: Int -> IO () +step_acc n = do + putStr (show n ++ ": ") + time (lexing_acc n) + +lexing_acc2 :: Int -> Int +lexing_acc2 n = case (lex_acc2 while_regs (string_repeat prog2 n) f_id) of + Just result -> 1 + Nothing -> 0 + +step_acc2 :: Int -> IO () +step_acc2 n = do + putStr (show n ++ ": ") + time (lexing_acc2 n) + +main :: IO () +main = do + forM_ [1000,2000..5000] step_simp + printf "\n" + forM_ [1000,2000..5000] step_simp2 + printf "\n" + forM_ [1000,2000..5000] step_acc + printf "\n" + forM_ [1000,2000..5000] step_acc2 \ No newline at end of file diff -r 2bc119fc8657 -r 94824659f6d7 progs/ocaml/re.ml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/ocaml/re.ml Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,394 @@ + +type rexp = + NULL + | EMPTY + | CHAR of char + | ALT of rexp * rexp + | SEQ of rexp * rexp + | STAR of rexp + | RECD of string * rexp;; + +type value = + Void + | Chr of char + | Sequ of value * value + | Left of value + | Right of value + | Stars of value list + | Rec of string * value;; + +let rec string_of_val v = match v with + Void -> "Void" + | Chr(c) -> String.make 1 c + | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")" + | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")" + | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")" + | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]" + | Rec(x, v1) -> x ^ " $ " ^ string_of_val v1;; + + +(* some helper functions for strings *) +let explode s = + let rec exp i l = + if i < 0 then l else exp (i - 1) (s.[i] :: l) in + exp (String.length s - 1) [];; + +let string_repeat s n = + Array.fold_left (^) "" (Array.make n s);; + +(* some helper functions for rexps *) +let rec seq s = match s with + [] -> EMPTY + | [c] -> CHAR(c) + | c::cs -> SEQ(CHAR(c), seq cs);; + +let chr c = CHAR(c) + +let str s = seq(explode s);; + +let plus r = SEQ(r, STAR(r));; + +let (++) r1 r2 = ALT(r1, r2);; + +let (--) r1 r2 = SEQ(r1, r2);; + +let ($) x r = RECD(x, r);; + +let alts rs = match rs with + [] -> NULL + | [r] -> r + | r::rs -> List.fold_left (++) r rs;; + + +(* size of a regular expressions - for testing purposes *) +let rec size r = match r with + NULL -> 1 + | EMPTY -> 1 + | CHAR(_) -> 1 + | ALT(r1, r2) -> 1 + (size r1) + (size r2) + | SEQ(r1, r2) -> 1 + (size r1) + (size r2) + | STAR(r) -> 1 + (size r) + | RECD(_, r) -> 1 + (size r);; + +(* nullable function: tests whether the regular + expression can recognise the empty string *) +let rec nullable r = match r with + NULL -> false + | EMPTY -> true + | CHAR(_) -> false + | ALT(r1, r2) -> nullable(r1) || nullable(r2) + | SEQ(r1, r2) -> nullable(r1) && nullable(r2) + | STAR(_) -> true + | RECD(_, r) -> nullable(r);; + +(* derivative of a regular expression r w.r.t. a character c *) +let rec der c r = match r with + NULL -> NULL + | EMPTY -> NULL + | CHAR(d) -> if c = d then EMPTY else NULL + | ALT(r1, r2) -> ALT(der c r1, der c r2) + | SEQ(r1, r2) -> + if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) + else SEQ(der c r1, r2) + | STAR(r) -> SEQ(der c r, STAR(r)) + | RECD(_, r) -> der c r;; + +(* derivative w.r.t. a list of chars (iterates der) *) +let rec ders s r = match s with + [] -> r + | c::s -> ders s (der c r);; + +(* extracts a string from value *) +let rec flatten v = match v with + Void -> "" + | Chr(c) -> String.make 1 c + | Left(v) -> flatten v + | Right(v) -> flatten v + | Sequ(v1, v2) -> flatten v1 ^ flatten v2 + | Stars(vs) -> String.concat "" (List.map flatten vs) + | Rec(_, v) -> flatten v;; + + +(* extracts an environment from a value *) +let rec env v = match v with + Void -> [] + | Chr(c) -> [] + | Left(v) -> env v + | Right(v) -> env v + | Sequ(v1, v2) -> env v1 @ env v2 + | Stars(vs) -> List.flatten (List.map env vs) + | Rec(x, v) -> (x, flatten v) :: env v;; + +let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";; +let string_of_env xs = String.concat "," (List.map string_of_pair xs);; + + +(* the value for a nullable rexp *) +let rec mkeps r = match r with + EMPTY -> Void + | ALT(r1, r2) -> + if nullable r1 then Left(mkeps r1) else Right(mkeps r2) + | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2) + | STAR(r) -> Stars([]) + | RECD(x, r) -> Rec(x, mkeps r);; + + +(* injection of a char into a value *) +let rec inj r c v = match r, v with + STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs) + | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2) + | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2) + | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1) + | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2) + | CHAR(d), Void -> Chr(d) + | RECD(x, r1), _ -> Rec(x, inj r1 c v);; + +(* some "rectification" functions for simplification *) +let f_id v = v;; +let f_right f = fun v -> Right(f v);; +let f_left f = fun v -> Left(f v);; +let f_alt f1 f2 = fun v -> match v with + Right(v) -> Right(f2 v) + | Left(v) -> Left(f1 v);; +let f_seq f1 f2 = fun v -> match v with + Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);; +let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);; +let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);; +let f_rec f = fun v -> match v with + Rec(x, v) -> Rec(x, f v);; + +exception ShouldNotHappen +let f_error v = raise ShouldNotHappen + +(* simplification of regular expressions returning also an + rectification function; no simplification under STARs *) +let rec simp r = match r with + ALT(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (r2s, f_right f2s) + | _, NULL -> (r1s, f_left f1s) + | _, _ -> if r1s = r2s then (r1s, f_left f1s) + else (ALT (r1s, r2s), f_alt f1s f2s)) + | SEQ(r1, r2) -> + let (r1s, f1s) = simp r1 in + let (r2s, f2s) = simp r2 in + (match r1s, r2s with + NULL, _ -> (NULL, f_error) + | _, NULL -> (NULL, f_error) + | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s) + | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s) + | _, _ -> (SEQ(r1s, r2s), f_seq f1s f2s)) + | RECD(x, r1) -> + let (r1s, f1s) = simp r1 in + (RECD(x, r1s), f_rec f1s) + | r -> (r, f_id) +;; + +let rec der_simp c r = match r with + NULL -> (NULL, f_id) + | EMPTY -> (NULL, f_id) + | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id) + | ALT(r1, r2) -> + let (r1d, f1d) = der_simp c r1 in + let (r2d, f2d) = der_simp c r2 in + (match r1d, r2d with + NULL, _ -> (r2d, f_right f2d) + | _, NULL -> (r1d, f_left f1d) + | _, _ -> if r1d = r2d then (r1d, f_left f1d) + else (ALT (r1d, r2d), f_alt f1d f2d)) + | SEQ(r1, r2) -> + if nullable r1 + then + let (r1d, f1d) = der_simp c r1 in + let (r2d, f2d) = der_simp c r2 in + let (r2s, f2s) = simp r2 in + (match r1d, r2s, r2d with + NULL, _, _ -> (r2d, f_right f2d) + | _, NULL, _ -> (r2d, f_right f2d) + | _, _, NULL -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) + | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) + | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) + | _, _, _ -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)) + else + let (r1d, f1d) = der_simp c r1 in + let (r2s, f2s) = simp r2 in + (match r1d, r2s with + NULL, _ -> (NULL, f_error) + | _, NULL -> (NULL, f_error) + | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s) + | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s) + | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s)) + | STAR(r1) -> + let (r1d, f1d) = der_simp c r1 in + (match r1d with + NULL -> (NULL, f_error) + | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id) + | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id)) + | RECD(x, r1) -> der_simp c r1 + + +(* matcher function *) +let matcher r s = nullable(ders (explode s) r);; + +(* lexing function (produces a value) *) +exception LexError;; + +let rec lex r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> inj r c (lex (der c r) cs);; + +let lexing r s = lex r (explode s);; + +(* lexing with simplification *) +let rec lex_simp r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> + let (r_simp, f_simp) = simp (der c r) in + inj r c (f_simp (lex_simp r_simp cs));; + +let lexing_simp r s = lex_simp r (explode s);; + +let rec lex_simp2 r s = match s with + [] -> if (nullable r) then mkeps r else raise LexError + | c::cs -> + let (r_simp, f_simp) = der_simp c r in + inj r c (f_simp (lex_simp2 r_simp cs));; + +let lexing_simp2 r s = lex_simp2 r (explode s);; + + +(* lexing with accumulation *) +let rec lex_acc r s f = match s with + [] -> if (nullable r) then f (mkeps r) else raise LexError + | c::cs -> + let (r_simp, f_simp) = simp (der c r) in + lex_acc r_simp cs (fun v -> f (inj r c (f_simp v)));; + +let lexing_acc r s = lex_acc r (explode s) (f_id);; + +let rec lex_acc2 r s f = match s with + [] -> if (nullable r) then f (mkeps r) else raise LexError + | c::cs -> + let (r_simp, f_simp) = der_simp c r in + lex_acc2 r_simp cs (fun v -> f (inj r c (f_simp v)));; + +let lexing_acc2 r s = lex_acc2 r (explode s) (f_id);; + + +(* Lexing rules for a small WHILE language *) +let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));; +let digit = alts (List.map chr (explode "0123456789"));; +let idents = sym -- STAR(sym ++ digit);; +let nums = plus(digit);; +let keywords = alts + (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);; +let semicolon = str ";" +let ops = alts + (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);; +let whitespace = plus(str " " ++ str "\n" ++ str "\t");; +let rparen = str ")";; +let lparen = str "(";; +let begin_paren = str "{";; +let end_paren = str "}";; + + +let while_regs = STAR(("k" $ keywords) ++ + ("i" $ idents) ++ + ("o" $ ops) ++ + ("n" $ nums) ++ + ("s" $ semicolon) ++ + ("p" $ (lparen ++ rparen)) ++ + ("b" $ (begin_paren ++ end_paren)) ++ + ("w" $ whitespace));; + + + +(* Some Tests + ============ *) + +let time f x = + let t = Sys.time() in + let f_x = (f x; f x; f x) in + (print_float ((Sys.time() -. t) /. 3.0); f_x);; + + +let prog0 = "read n";; + +let prog1 = "read n; write (n)";; +string_of_env (env (lexing_simp while_regs prog1));; + + +let prog2 = " +i := 2; +max := 100; +while i < max do { + isprime := 1; + j := 2; + while (j * j) <= i + 1 do { + if i % j == 0 then isprime := 0 else skip; + j := j + 1 + }; + if isprime == 1 then write i else skip; + i := i + 1 +}";; + +let tst1 = (lexing_simp while_regs prog2 = lexing_simp2 while_regs prog2) in +let tst2 = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) in +let tst3 = (lexing_simp while_regs prog2 = lexing_acc2 while_regs prog2) +in + print_string ("Sanity test simp vs simp2: >>" ^ (string_of_bool tst1) ^ "<<\n") ; + print_string ("Sanity test simp vs acc: >>" ^ (string_of_bool tst2) ^ "<<\n") ; + print_string ("Sanity test simp vs acc2: >>" ^ (string_of_bool tst3) ^ "<<") ; + print_newline ();; + + + +type range = + To of int * int;; + +let (---) i j = To(i, j);; + +let forby n = + fun range -> match range with To(lo, up) -> + (fun f -> + let rec loop lo = + if lo > up then () else (f lo; loop (lo + n)) + in loop lo);; + +let step_simp i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_simp while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_simp2 i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_simp2 while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_acc i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_acc while_regs) (string_repeat prog2 i) ; + print_newline ());; + +let step_acc2 i = + (print_string ((string_of_int i) ^ ": ") ; + time (lexing_acc2 while_regs) (string_repeat prog2 i) ; + print_newline ());; + +forby 100 (100 --- 700) step_simp;; +print_newline ();; +forby 100 (100 --- 700) step_simp2;; +print_newline ();; +forby 100 (100 --- 700) step_acc;; +print_newline ();; +forby 100 (100 --- 700) step_acc2;; +print_newline ();; +forby 1000 (1000 --- 5000) step_acc;; +print_newline ();; +forby 1000 (1000 --- 5000) step_acc2;; +(*print_newline ();;*) +(* forby 500 (100 --- 5000) step_simp;; *) + diff -r 2bc119fc8657 -r 94824659f6d7 progs/scala/re.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/scala/re.scala Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,231 @@ +import scala.language.implicitConversions +import scala.language.reflectiveCalls +import scala.annotation.tailrec + +abstract class Rexp +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class RECD(x: String, r: Rexp) extends Rexp + +abstract class Val +case object Void extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(n: Int, v: Val) extends Val +case class Right(n: Int, v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case class Rec(x: String, v: Val) extends Val + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]): Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) + def $ (r: Rexp) = RECD(s, r) +} + +// size of a regular expressions - for testing purposes +def size(r: Rexp) : Int = r match { + case NULL => 1 + case EMPTY => 1 + case CHAR(_) => 1 + case ALT(r1, r2) => 1 + size(r1) + size(r2) + case SEQ(r1, r2) => 1 + size(r1) + size(r2) + case STAR(r) => 1 + size(r) + case RECD(_, r) => 1 + size(r) +} + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case RECD(_, r1) => nullable(r1) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case RECD(_, r1) => der(c, r1) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// extracts a string from value +def flatten(v: Val) : String = v match { + case Void => "" + case Chr(c) => c.toString + case Left(n, v) => flatten(v) + case Right(n, v) => flatten(v) + case Sequ(v1, v2) => flatten(v1) + flatten(v2) + case Stars(vs) => vs.map(flatten).mkString + case Rec(_, v) => flatten(v) +} + +// extracts an environment from a value +def env(v: Val) : List[(String, String)] = v match { + case Void => Nil + case Chr(c) => Nil + case Left(n, v) => env(v) + case Right(n, v) => env(v) + case Sequ(v1, v2) => env(v1) ::: env(v2) + case Stars(vs) => vs.flatMap(env) + case Rec(x, v) => (x, flatten(v))::env(v) +} + +def left_inc(v: Val) = v match { + case Left(v, n) => Left(v, n + 1) + case v => Left(v, 1) +} + +def right_inc(v: Val) = v match { + case Right(v, n) => Right(v, n + 1) + case v => Right(v, 1) +} + +def mkeps(r: Rexp) : Val = r match { + case EMPTY => Void + case ALT(r1, r2) => + if (nullable(r1)) left_inc(mkeps(r1)) else right_inc(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case STAR(r) => Stars(Nil) + case RECD(x, r) => Rec(x, mkeps(r)) +} + +def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { + case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) + case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) + case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) + case (ALT(r1, r2), Left(v1), 1) => Left(inj(r1, c, v1), 1) + case (ALT(r1, r2), Left(v1), n) => inc_left(inj(r1, c, Left(v1, n - 1))) + case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) + case (CHAR(d), Void) => Chr(d) + case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) +} + + +// main lexing function (produces a value) +def lex(r: Rexp, s: List[Char]) : Val = s match { + case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") + case c::cs => inj(r, c, lex(der(c, r), cs)) +} + +def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) + + +// Lexing Rules for a Small While Language + +def PLUS(r: Rexp) = r ~ r.% +val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" +val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +val ID = SYM ~ (SYM | DIGIT).% +val NUM = PLUS(DIGIT) +val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" +val SEMI: Rexp = ";" +val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" +val WHITESPACE = PLUS(" " | "\n" | "\t") +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" + +/* + * val WHILE_REGS = (("k" $ KEYWORD) | + ("i" $ ID) | + ("o" $ OP) | + ("n" $ NUM) | + ("s" $ SEMI) | + ("p" $ (LPAREN | RPAREN)) | + ("b" $ (BEGIN | END)) | + ("w" $ WHITESPACE)).% +*/ + +val WHILE_REGS = (KEYWORD | + ID | + OP | + NUM | + SEMI | + LPAREN | RPAREN | + BEGIN | END | + WHITESPACE).% + + +// Some Tests +//============ + +def time[T](code: => T) = { + val start = System.nanoTime() + val result = code + val end = System.nanoTime() + println((end - start)/1.0e9) + result +} + +val prog0 = """read n""" +env (lexing_simp(WHILE_REGS, prog0)) + +println("Next test") +/* +val prog1 = """read n; write (n)""" +env (lexing_simp(WHILE_REGS, prog1)) + +val prog2 = """ +i := 2; +max := 100; +while i < max do { + isprime := 1; + j := 2; + while (j * j) <= i + 1 do { + if i % j == 0 then isprime := 0 else skip; + j := j + 1 + }; + if isprime == 1 then write i else skip; + i := i + 1 +}""" +lexing_acc(WHILE_REGS, prog2) + +for (i <- 1 to 228 by 1) { + print(i.toString + ": ") + time(lexing_acc(WHILE_REGS, prog2 * i)) +} + +for (i <- 1 to 100 by 10) { + print(i.toString + ": ") + time(lexing_simp(WHILE_REGS, prog2 * i)) +} +*/ diff -r 2bc119fc8657 -r 94824659f6d7 progs/sml/re.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/sml/re.ML Fri Aug 15 14:10:58 2014 +0100 @@ -0,0 +1,426 @@ + +datatype rexp = + NULL + | EMPTY + | CHAR of char + | ALT of rexp * rexp + | SEQ of rexp * rexp + | STAR of rexp + | RECD of string * rexp + +datatype value = + Void + | Chr of char + | Sequ of value * value + | Left of value + | Right of value + | Stars of value list + | Rec of string * value + +(* some helper functions for strings *) +fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s)) + +(* some helper functions for rexps *) +fun seq s = case s of + [] => EMPTY + | [c] => CHAR(c) + | c::cs => SEQ(CHAR(c), seq cs) + +fun chr c = CHAR(c) + +fun str s = seq(explode s) + +fun plus r = SEQ(r, STAR(r)) + +infix 9 ++ +infix 9 -- +infix 9 $ + +fun op ++ (r1, r2) = ALT(r1, r2) + +fun op -- (r1, r2) = SEQ(r1, r2) + +fun op $ (x, r) = RECD(x, r) + +fun alts rs = case rs of + [] => NULL + | [r] => r + | r::rs => List.foldl (op ++) r rs + + +(* size of a regular expressions - for testing purposes *) +fun size r = case r of + NULL => 1 + | EMPTY => 1 + | CHAR(_) => 1 + | ALT(r1, r2) => 1 + (size r1) + (size r2) + | SEQ(r1, r2) => 1 + (size r1) + (size r2) + | STAR(r) => 1 + (size r) + | RECD(_, r) => 1 + (size r) + +(* nullable function: tests whether the regular + expression can recognise the empty string *) +fun nullable r = case r of + NULL => false + | EMPTY => true + | CHAR(_) => false + | ALT(r1, r2) => nullable(r1) orelse nullable(r2) + | SEQ(r1, r2) => nullable(r1) andalso nullable(r2) + | STAR(_) => true + | RECD(_, r) => nullable(r) + +(* derivative of a regular expression r w.r.t. a character c *) +fun der c r = case r of + NULL => NULL + | EMPTY => NULL + | CHAR(d) => if c = d then EMPTY else NULL + | ALT(r1, r2) => ALT(der c r1, der c r2) + | SEQ(r1, r2) => + if nullable r1 then ALT(SEQ(der c r1, r2), der c r2) + else SEQ(der c r1, r2) + | STAR(r) => SEQ(der c r, STAR(r)) + | RECD(_, r) => der c r + +(* derivative w.r.t. a list of chars (iterates der) *) +fun ders s r = case s of + [] => r + | c::s => ders s (der c r) + +(* extracts a string from value *) +fun flatten v = case v of + Void => "" + | Chr(c) => Char.toString c + | Left(v) => flatten v + | Right(v) => flatten v + | Sequ(v1, v2) => flatten v1 ^ flatten v2 + | Stars(vs) => String.concat (List.map flatten vs) + | Rec(_, v) => flatten v + + +(* extracts an environment from a value *) +fun env v = case v of + Void => [] + | Chr(c) => [] + | Left(v) => env v + | Right(v) => env v + | Sequ(v1, v2) => env v1 @ env v2 + | Stars(vs) => List.foldr (op @) [] (List.map env vs) + | Rec(x, v) => (x, flatten v) :: env v + +fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")" +fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs) + + +(* the value for a nullable rexp *) +fun mkeps r = case r of + EMPTY => Void + | ALT(r1, r2) => + if nullable r1 then Left(mkeps r1) else Right(mkeps r2) + | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2) + | STAR(r) => Stars([]) + | RECD(x, r) => Rec(x, mkeps r) + +exception Error + +(* injection of a char into a value *) +fun inj r c v = case (r, v) of + (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs) + | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2) + | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2) + | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2) + | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1) + | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2) + | (CHAR(d), Void) => Chr(d) + | (RECD(x, r1), _) => Rec(x, inj r1 c v) + | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n"); + print ("v: " ^ PolyML.makestring v ^ "\n"); + raise Error) + +(* some "rectification" functions for simplification *) +fun f_id v = v +fun f_right f = fn v => Right(f v) +fun f_left f = fn v => Left(f v) +fun f_alt f1 f2 = fn v => case v of + Right(v) => Right(f2 v) + | Left(v) => Left(f1 v) +fun f_seq f1 f2 = fn v => case v of + Sequ(v1, v2) => Sequ(f1 v1, f2 v2) +fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v) +fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void) +fun f_rec f = fn v => case v of + Rec(x, v) => Rec(x, f v) + +exception ShouldNotHappen + +fun f_error v = raise ShouldNotHappen + +(* simplification of regular expressions returning also an + rectification function; no simplification under STARs *) +fun simp r = case r of + ALT(r1, r2) => + let val (r1s, f1s) = simp r1 + val (r2s, f2s) = simp r2 in + (case (r1s, r2s) of + (NULL, _) => (r2s, f_right f2s) + | (_, NULL) => (r1s, f_left f1s) + | (_, _) => if r1s = r2s then (r1s, f_left f1s) + else (ALT (r1s, r2s), f_alt f1s f2s)) + end + | SEQ(r1, r2) => + let val (r1s, f1s) = simp r1 + val (r2s, f2s) = simp r2 in + (case (r1s, r2s) of + (NULL, _) => (NULL, f_error) + | (_, NULL) => (NULL, f_error) + | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s) + | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s) + | (_, _) => (SEQ(r1s, r2s), f_seq f1s f2s)) + end + | RECD(x, r1) => + let val (r1s, f1s) = simp r1 in + (RECD(x, r1s), f_rec f1s) + end + | r => (r, f_id) + +fun der_simp c r = case r of + NULL => (NULL, f_id) + | EMPTY => (NULL, f_id) + | CHAR(d) => ((if c = d then EMPTY else NULL), f_id) + | ALT(r1, r2) => + let + val (r1d, f1d) = der_simp c r1 + val (r2d, f2d) = der_simp c r2 + in + case (r1d, r2d) of + (NULL, _) => (r2d, f_right f2d) + | (_, NULL) => (r1d, f_left f1d) + | (_, _) => if r1d = r2d then (r1d, f_left f1d) + else (ALT (r1d, r2d), f_alt f1d f2d) + end + | SEQ(r1, r2) => + if nullable r1 + then + let + val (r1d, f1d) = der_simp c r1 + val (r2d, f2d) = der_simp c r2 + val (r2s, f2s) = simp r2 + in + case (r1d, r2s, r2d) of + (NULL, _, _) => (r2d, f_right f2d) + | (_, NULL, _) => (r2d, f_right f2d) + | (_, _, NULL) => (SEQ(r1d, r2s), f_left (f_seq f1d f2s)) + | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d) + | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d) + | (_, _, _) => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d) + end + else + let + val (r1d, f1d) = der_simp c r1 + val (r2s, f2s) = simp r2 + in + case (r1d, r2s) of + (NULL, _) => (NULL, f_error) + | (_, NULL) => (NULL, f_error) + | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s) + | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s) + | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s) + end + | STAR(r1) => + let + val (r1d, f1d) = der_simp c r1 + in + case r1d of + NULL => (NULL, f_error) + | EMPTY => (STAR r1, f_seq_Void1 f1d f_id) + | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id) + end + | RECD(x, r1) => der_simp c r1 + + + +(* matcher function *) +fun matcher r s = nullable(ders (explode s) r) + +(* lexing function (produces a value) *) +exception LexError + +fun lex r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => inj r c (lex (der c r) cs) + +fun lexing r s = lex r (explode s) + +(* lexing with simplification *) +fun lex_simp r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => + let val (r_simp, f_simp) = simp (der c r) in + inj r c (f_simp (lex_simp r_simp cs)) + end + +fun lexing_simp r s = lex_simp r (explode s) + +fun lex_simp2 r s = case s of + [] => if (nullable r) then mkeps r else raise LexError + | c::cs => + let val (r_simp, f_simp) = der_simp c r in + inj r c (f_simp (lex_simp2 r_simp cs)) + end + +fun lexing_simp2 r s = lex_simp2 r (explode s) + +fun lex_acc r s f = case s of + [] => if (nullable r) then f (mkeps r) else raise LexError + | c::cs => + let val (r_simp, f_simp) = simp (der c r) in + lex_acc r_simp cs (fn v => f (inj r c (f_simp v))) + end + +fun lexing_acc r s = lex_acc r (explode s) (f_id) + +fun lex_acc2 r s f = case s of + [] => if (nullable r) then f (mkeps r) else raise LexError + | c::cs => + let val (r_simp, f_simp) = der_simp c r in + lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v))) + end + +fun lexing_acc2 r s = lex_acc2 r (explode s) (f_id) + + +(* Lexing rules for a small WHILE language *) +val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz")) +val digit = alts (List.map chr (explode "0123456789")) +val idents = sym -- STAR(sym ++ digit) +val nums = plus(digit) +val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"]) +val semicolon = str ";" +val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"]) +val whitespace = plus(str " " ++ str "\n" ++ str "\t") +val rparen = str ")" +val lparen = str "(" +val begin_paren = str "{" +val end_paren = str "}" + + +val while_regs = STAR(("k" $ keywords) ++ + ("i" $ idents) ++ + ("o" $ ops) ++ + ("n" $ nums) ++ + ("s" $ semicolon) ++ + ("p" $ (lparen ++ rparen)) ++ + ("b" $ (begin_paren ++ end_paren)) ++ + ("w" $ whitespace)) + + + +(* Some Tests + ============ *) + +fun time f x = + let + val t_start = Timer.startCPUTimer() + val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x) + val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0 +in + (print ((Real.toString t_end) ^ "\n"); f_x) +end + +val prog = "ab"; +val reg = ("x" $ ((str "a") -- (str "b"))); +print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n"); +print("Acc: " ^ PolyML.makestring (lexing_acc reg prog) ^ "\n"); +print("Env " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n"); + +fun fst (x, y) = x; +fun snd (x, y) = y; + +val derS = [reg, + der #"a" reg, + fst (simp (der #"a" reg)), + fst (der_simp #"a" reg)]; + +val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")), + (snd (der_simp #"a" reg)) (Chr(#"b")) + ]; + +print("Ders: \n" ^ + String.concatWith "\n" (List.map PolyML.makestring derS) + ^ "\n\n"); +print("Vs: \n" ^ + String.concatWith "\n" (List.map PolyML.makestring vS) + ^ "\n\n"); + + +val prog0 = "read n"; +print("Env0 is: \n" ^ string_of_env (env (lexing_acc while_regs prog0)) ^ "\n"); + +val prog1 = "read n; write (n)"; +print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n"); + + +val prog2 = String.concatWith "\n" + ["i := 2;", + "max := 100;", + "while i < max do {", + " isprime := 1;", + " j := 2;", + " while (j * j) <= i + 1 do {", + " if i % j == 0 then isprime := 0 else skip;", + " j := j + 1", + " };", + " if isprime == 1 then write i else skip;", + " i := i + 1", + "}"]; + + +let + val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) +in + print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n") +end; + +(* loops in ML *) +datatype for = to of int * int +infix to + +val for = + fn lo to up => + (fn f => + let fun loop lo = + if lo > up then () else (f lo; loop (lo + 1)) + in loop lo end) + +fun forby n = + fn lo to up => + (fn f => + let fun loop lo = + if lo > up then () else (f lo; loop (lo + n)) + in loop lo end) + + +fun step_simp i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_simp while_regs) (string_repeat prog2 i)) + +fun step_simp2 i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_simp2 while_regs) (string_repeat prog2 i)) + +fun step_acc i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_acc while_regs) (string_repeat prog2 i)) + +fun step_acc2 i = + (print ((Int.toString i) ^ ": ") ; + time (lexing_acc2 while_regs) (string_repeat prog2 i)) + +val main1 = forby 1000 (1000 to 5000) step_simp; +print "\n"; +val main2 = forby 1000 (1000 to 5000) step_simp2; +print "\n"; +val main3 = forby 1000 (1000 to 5000) step_acc; +print "\n"; +val main4 = forby 1000 (1000 to 5000) step_acc2; + +