progs/fsharp/re.ml
author Chengsong
Wed, 23 Aug 2023 03:02:31 +0100
changeset 668 3831621d7b14
parent 156 6a43ea9305ba
permissions -rw-r--r--
added technical Overview section, almost done introduction


type rexp =
   ZERO 
 | ONE 
 | CHAR of char
 | ALT of rexp * rexp
 | SEQ of rexp * rexp 
 | STAR of rexp 
 | RECD of string * rexp;;

type value =
   Empty
 | Chr of char
 | Sequ of value * value
 | Left of value
 | Right of value
 | Stars of value list
 | Rec of string * value;;

(* some helper functions for strings *)   
let explode s = [for c in s -> c];;

let string_repeat s n =  String.replicate n s;;

(* some helper functions for rexps *)
let rec seq s = match s with
  | [] -> ONE
  | [c] -> CHAR(c)
  | c::cs -> SEQ(CHAR(c), seq cs);;

let chr c = CHAR(c)

let str s = seq(explode s);;

let plus r = SEQ(r, STAR(r));;

let (++) r1 r2 = ALT(r1, r2);;

let (--) r1 r2 = SEQ(r1, r2);;

let ($) x r = RECD(x, r);;

let alts rs = match rs with 
  | [] -> ZERO
  | [r] -> r
  | r::rs -> List.fold (++) r rs;;


(* size of a regular expressions - for testing purposes *)
let rec size r = match r with
  | ZERO -> 1
  | ONE -> 1
  | CHAR(_) -> 1
  | ALT(r1, r2) -> 1 + (size r1) + (size r2)
  | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
  | STAR(r) -> 1 + (size r)
  | RECD(_, r) -> 1 + (size r);;

(* nullable function: tests whether the regular 
   expression can recognise the empty string *)
let rec nullable r = match r with
  | ZERO -> false
  | ONE -> true
  | CHAR(_) -> false
  | ALT(r1, r2) -> nullable(r1) || nullable(r2)
  | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
  | STAR(_) -> true
  | RECD(_, r) -> nullable(r);;

(* derivative of a regular expression r w.r.t. a character c *)
let rec der c r = match r with 
  | ZERO -> ZERO
  | ONE -> ZERO
  | CHAR(d) -> if c = d then ONE else ZERO
  | ALT(r1, r2) -> ALT(der c r1, der c r2)
  | SEQ(r1, r2) -> 
      if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
      else SEQ(der c r1, r2)
  | STAR(r) -> SEQ(der c r, STAR(r))
  | RECD(_, r) -> der c r;;

(* derivative w.r.t. a list of chars (iterates der) *)
let rec ders s r = match s with 
  | [] -> r
  | c::s -> ders s (der c r);;

(* extracts a string from value *)
let rec flatten v = match v with 
  | Empty -> ""
  | Chr(c) -> System.Convert.ToString(c)
  | Left(v) -> flatten v
  | Right(v) -> flatten v
  | Sequ(v1, v2) -> flatten v1 ^ flatten v2
  | Stars(vs) -> String.concat "" (List.map flatten vs)
  | Rec(_, v) -> flatten v;;


(* extracts an environment from a value *)
let rec env v = match v with
  | Empty -> []
  | Chr(c) -> []
  | Left(v) -> env v
  | Right(v) -> env v
  | Sequ(v1, v2) -> env v1 @ env v2
  | Stars(vs) -> List.fold (@) [] (List.map env vs)
  | Rec(x, v) -> (x, flatten v) :: env v;;

let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";;
let string_of_env xs = String.concat "," (List.map string_of_pair xs);;


(* the value for a nullable rexp *)
let rec mkeps r = match r with
  | ONE -> Empty
  | ALT(r1, r2) -> 
      if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
  | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
  | STAR(r) -> Stars([])
  | RECD(x, r) -> Rec(x, mkeps r);;


(* injection of a char into a value *)
let rec inj r c v = match r, v with
  | STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs)
  | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
  | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
  | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
  | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
  | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
  | CHAR(d), Empty -> Chr(d) 
  | RECD(x, r1), _ -> Rec(x, inj r1 c v);;

(* some "rectification" functions for simplification *)
let f_id v = v;;
let f_right f = fun v -> Right(f v);;
let f_left f = fun v -> Left(f v);;
let f_alt f1 f2 = fun v -> match v with 
    Right(v) -> Right(f2 v)
  | Left(v) -> Left(f1 v);;
let f_seq f1 f2 = fun v -> match v with 
  Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
let f_seq_Empty1 f1 f2 = fun v -> Sequ(f1 Empty, f2 v);;
let f_seq_Empty2 f1 f2 = fun v -> Sequ(f1 v, f2 Empty);;
let f_rec f = fun v -> match v with
    Rec(x, v) -> Rec(x, f v);;

(* simplification of regular expressions returning also an 
   rectification function; no simplification under STARs *)
let rec simp r = match r with
    ALT(r1, r2) -> 
      let (r1s, f1s) = simp r1 in 
      let (r2s, f2s) = simp r2 in
      (match r1s, r2s with
          ZERO, _ -> (r2s, f_right f2s)
        | _, ZERO -> (r1s, f_left f1s)
        | _, _    -> if r1s = r2s then (r1s, f_left f1s)
                     else (ALT (r1s, r2s), f_alt f1s f2s)) 
  | SEQ(r1, r2) -> 
      let (r1s, f1s) = simp r1 in
      let (r2s, f2s) = simp r2 in
      (match r1s, r2s with
          ZERO, _  -> (ZERO, f_right f2s)
        | _, ZERO  -> (ZERO, f_left f1s)
        | ONE, _ -> (r2s, f_seq_Empty1 f1s f2s)
        | _, ONE -> (r1s, f_seq_Empty2 f1s f2s)
        | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
  | RECD(x, r1) -> 
      let (r1s, f1s) = simp r1 in
      (RECD(x, r1s), f_rec f1s)
  | r -> (r, f_id)
;;

(* matcher function *)
let matcher r s = nullable(ders (explode s) r);;

(* lexing function (produces a value) *)
exception LexError;;

let rec lex r s = match s with
    [] -> if (nullable r) then mkeps r else raise LexError
  | c::cs -> inj r c (lex (der c r) cs);;

let lexing r s = lex r (explode s);;

(* lexing with simplification *)
let rec lex_simp r s = match s with
    [] -> if (nullable r) then mkeps r else raise LexError
  | c::cs -> 
    let (r_simp, f_simp) = simp (der c r) in
    inj r c (f_simp (lex_simp r_simp cs));;

let lexing_simp r s = lex_simp r (explode s);;




(* Lexing rules for a small WHILE language *)
let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));;
let digit = alts (List.map chr (explode "0123456789"));;
let idents =  sym -- STAR(sym ++ digit);;
let nums = plus(digit);;
let keywords = alts (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);;
let semicolon = str ";"
let ops = alts (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);;
let whitespace = plus(str " " ++ str "\n" ++ str "\t");;
let rparen = str ")";;
let lparen = str "(";;
let begin_paren = str "{";;
let end_paren = str "}";;


let while_regs = STAR(("k" $ keywords) ++
                      ("i" $ idents) ++
                      ("o" $ ops) ++ 
                      ("n" $ nums) ++ 
                      ("s" $ semicolon) ++ 
                      ("p" $ (lparen ++ rparen)) ++ 
                      ("b" $ (begin_paren ++ end_paren)) ++ 
                      ("w" $ whitespace));;



(* Some Tests
  ============ *)

let time f x =
  let t = System.DateTime.Now in
  let f_x = f x in
  (printfn "%O" (System.DateTime.Now - t); f_x);;

let prog0 = "read n";;
string_of_env (env (lexing while_regs prog0));;

let prog1 = "read  n; write (n)";;
string_of_env (env (lexing_simp while_regs prog1));;


let prog2 = "
i := 2;
max := 100;
while i < max do {
  isprime := 1;
  j := 2;
  while (j * j) <= i + 1  do {
    if i % j == 0 then isprime := 0  else skip;
    j := j + 1
  };
  if isprime == 1 then write i else skip;
  i := i + 1
}";;

for i = 1 to 100 do
  printf "%i: " i ;
  time (lexing_simp while_regs) (string_repeat prog2 i);
done;;