progs/ocaml/re.ml
changeset 3 94824659f6d7
child 156 6a43ea9305ba
equal deleted inserted replaced
2:2bc119fc8657 3:94824659f6d7
       
     1 
       
     2 type rexp =
       
     3    NULL 
       
     4  | EMPTY 
       
     5  | CHAR of char
       
     6  | ALT of rexp * rexp
       
     7  | SEQ of rexp * rexp 
       
     8  | STAR of rexp 
       
     9  | RECD of string * rexp;;
       
    10 
       
    11 type value =
       
    12    Void
       
    13  | Chr of char
       
    14  | Sequ of value * value
       
    15  | Left of value
       
    16  | Right of value
       
    17  | Stars of value list
       
    18  | Rec of string * value;;
       
    19 
       
    20 let rec string_of_val v = match v with
       
    21    Void -> "Void"
       
    22  | Chr(c) -> String.make 1 c
       
    23  | Sequ(v1, v2) -> "Seq(" ^ string_of_val v1 ^ "," ^ string_of_val v2 ^ ")"
       
    24  | Left(v1) -> "Left(" ^ string_of_val v1 ^ ")"
       
    25  | Right(v1) -> "Right(" ^ string_of_val v1 ^ ")"
       
    26  | Stars(vs) -> "[" ^ String.concat "," (List.map string_of_val vs) ^ "]"
       
    27  | Rec(x, v1) -> x ^ " $ " ^ string_of_val v1;;
       
    28 
       
    29 
       
    30 (* some helper functions for strings *)   
       
    31 let explode s =
       
    32   let rec exp i l =
       
    33     if i < 0 then l else exp (i - 1) (s.[i] :: l) in
       
    34   exp (String.length s - 1) [];;
       
    35 
       
    36 let string_repeat s n =
       
    37   Array.fold_left (^) "" (Array.make n s);;
       
    38 
       
    39 (* some helper functions for rexps *)
       
    40 let rec seq s = match s with
       
    41     [] -> EMPTY
       
    42   | [c] -> CHAR(c)
       
    43   | c::cs -> SEQ(CHAR(c), seq cs);;
       
    44 
       
    45 let chr c = CHAR(c)
       
    46 
       
    47 let str s = seq(explode s);;
       
    48 
       
    49 let plus r = SEQ(r, STAR(r));;
       
    50 
       
    51 let (++) r1 r2 = ALT(r1, r2);;
       
    52 
       
    53 let (--) r1 r2 = SEQ(r1, r2);;
       
    54 
       
    55 let ($) x r = RECD(x, r);;
       
    56 
       
    57 let alts rs = match rs with 
       
    58     [] -> NULL
       
    59   | [r] -> r
       
    60   | r::rs -> List.fold_left (++) r rs;;
       
    61 
       
    62 
       
    63 (* size of a regular expressions - for testing purposes *)
       
    64 let rec size r = match r with
       
    65     NULL -> 1
       
    66   | EMPTY -> 1
       
    67   | CHAR(_) -> 1
       
    68   | ALT(r1, r2) -> 1 + (size r1) + (size r2)
       
    69   | SEQ(r1, r2) -> 1 + (size r1) + (size r2)
       
    70   | STAR(r) -> 1 + (size r)
       
    71   | RECD(_, r) -> 1 + (size r);;
       
    72 
       
    73 (* nullable function: tests whether the regular 
       
    74    expression can recognise the empty string *)
       
    75 let rec nullable r = match r with
       
    76     NULL -> false
       
    77   | EMPTY -> true
       
    78   | CHAR(_) -> false
       
    79   | ALT(r1, r2) -> nullable(r1) || nullable(r2)
       
    80   | SEQ(r1, r2) -> nullable(r1) && nullable(r2)
       
    81   | STAR(_) -> true
       
    82   | RECD(_, r) -> nullable(r);;
       
    83 
       
    84 (* derivative of a regular expression r w.r.t. a character c *)
       
    85 let rec der c r = match r with 
       
    86     NULL -> NULL
       
    87   | EMPTY -> NULL
       
    88   | CHAR(d) -> if c = d then EMPTY else NULL
       
    89   | ALT(r1, r2) -> ALT(der c r1, der c r2)
       
    90   | SEQ(r1, r2) -> 
       
    91       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
       
    92       else SEQ(der c r1, r2)
       
    93   | STAR(r) -> SEQ(der c r, STAR(r))
       
    94   | RECD(_, r) -> der c r;;
       
    95 
       
    96 (* derivative w.r.t. a list of chars (iterates der) *)
       
    97 let rec ders s r = match s with 
       
    98     [] -> r
       
    99   | c::s -> ders s (der c r);;
       
   100 
       
   101 (* extracts a string from value *)
       
   102 let rec flatten v = match v with 
       
   103     Void -> ""
       
   104   | Chr(c) -> String.make 1 c
       
   105   | Left(v) -> flatten v
       
   106   | Right(v) -> flatten v
       
   107   | Sequ(v1, v2) -> flatten v1 ^ flatten v2
       
   108   | Stars(vs) -> String.concat "" (List.map flatten vs)
       
   109   | Rec(_, v) -> flatten v;;
       
   110 
       
   111 
       
   112 (* extracts an environment from a value *)
       
   113 let rec env v = match v with
       
   114     Void -> []
       
   115   | Chr(c) -> []
       
   116   | Left(v) -> env v
       
   117   | Right(v) -> env v
       
   118   | Sequ(v1, v2) -> env v1 @ env v2
       
   119   | Stars(vs) -> List.flatten (List.map env vs)
       
   120   | Rec(x, v) -> (x, flatten v) :: env v;;
       
   121 
       
   122 let string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")";;
       
   123 let string_of_env xs = String.concat "," (List.map string_of_pair xs);;
       
   124 
       
   125 
       
   126 (* the value for a nullable rexp *)
       
   127 let rec mkeps r = match r with
       
   128     EMPTY -> Void
       
   129   | ALT(r1, r2) -> 
       
   130       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
       
   131   | SEQ(r1, r2) -> Sequ(mkeps r1, mkeps r2)
       
   132   | STAR(r) -> Stars([])
       
   133   | RECD(x, r) -> Rec(x, mkeps r);;
       
   134 
       
   135 
       
   136 (* injection of a char into a value *)
       
   137 let rec inj r c v = match r, v with
       
   138     STAR(r), Sequ(v1, Stars(vs)) -> Stars(inj r c v1 :: vs)
       
   139   | SEQ(r1, r2), Sequ(v1, v2) -> Sequ(inj r1 c v1, v2)
       
   140   | SEQ(r1, r2), Left(Sequ(v1, v2)) -> Sequ(inj r1 c v1, v2)
       
   141   | SEQ(r1, r2), Right(v2) -> Sequ(mkeps r1, inj r2 c v2)
       
   142   | ALT(r1, r2), Left(v1) -> Left(inj r1 c v1)
       
   143   | ALT(r1, r2), Right(v2) -> Right(inj r2 c v2)
       
   144   | CHAR(d), Void -> Chr(d) 
       
   145   | RECD(x, r1), _ -> Rec(x, inj r1 c v);;
       
   146 
       
   147 (* some "rectification" functions for simplification *)
       
   148 let f_id v = v;;
       
   149 let f_right f = fun v -> Right(f v);;
       
   150 let f_left f = fun v -> Left(f v);;
       
   151 let f_alt f1 f2 = fun v -> match v with 
       
   152     Right(v) -> Right(f2 v)
       
   153   | Left(v) -> Left(f1 v);;
       
   154 let f_seq f1 f2 = fun v -> match v with 
       
   155   Sequ(v1, v2) -> Sequ(f1 v1, f2 v2);;
       
   156 let f_seq_Void1 f1 f2 = fun v -> Sequ(f1 Void, f2 v);;
       
   157 let f_seq_Void2 f1 f2 = fun v -> Sequ(f1 v, f2 Void);;
       
   158 let f_rec f = fun v -> match v with
       
   159     Rec(x, v) -> Rec(x, f v);;
       
   160 
       
   161 exception ShouldNotHappen
       
   162 let f_error v = raise ShouldNotHappen
       
   163 
       
   164 (* simplification of regular expressions returning also an 
       
   165    rectification function; no simplification under STARs *)
       
   166 let rec simp r = match r with
       
   167     ALT(r1, r2) -> 
       
   168       let (r1s, f1s) = simp r1 in 
       
   169       let (r2s, f2s) = simp r2 in
       
   170       (match r1s, r2s with
       
   171           NULL, _ -> (r2s, f_right f2s)
       
   172         | _, NULL -> (r1s, f_left f1s)
       
   173         | _, _    -> if r1s = r2s then (r1s, f_left f1s)
       
   174                      else (ALT (r1s, r2s), f_alt f1s f2s)) 
       
   175   | SEQ(r1, r2) -> 
       
   176       let (r1s, f1s) = simp r1 in
       
   177       let (r2s, f2s) = simp r2 in
       
   178       (match r1s, r2s with
       
   179           NULL, _  -> (NULL, f_error)
       
   180         | _, NULL  -> (NULL, f_error)
       
   181         | EMPTY, _ -> (r2s, f_seq_Void1 f1s f2s)
       
   182         | _, EMPTY -> (r1s, f_seq_Void2 f1s f2s)
       
   183         | _, _     -> (SEQ(r1s, r2s), f_seq f1s f2s))
       
   184   | RECD(x, r1) -> 
       
   185       let (r1s, f1s) = simp r1 in
       
   186       (RECD(x, r1s), f_rec f1s)
       
   187   | r -> (r, f_id)
       
   188 ;;
       
   189 
       
   190 let rec der_simp c r = match r with
       
   191     NULL -> (NULL, f_id)
       
   192   | EMPTY -> (NULL, f_id)
       
   193   | CHAR(d) -> ((if c = d then EMPTY else NULL), f_id)
       
   194   | ALT(r1, r2) -> 
       
   195       let (r1d, f1d) = der_simp c r1 in
       
   196       let (r2d, f2d) = der_simp c r2 in
       
   197       (match r1d, r2d with
       
   198           NULL, _ -> (r2d, f_right f2d)
       
   199         | _, NULL -> (r1d, f_left f1d)
       
   200         | _, _    -> if r1d = r2d then (r1d, f_left f1d)
       
   201                      else (ALT (r1d, r2d), f_alt f1d f2d))
       
   202   | SEQ(r1, r2) -> 
       
   203       if nullable r1 
       
   204       then 
       
   205         let (r1d, f1d) = der_simp c r1 in 
       
   206         let (r2d, f2d) = der_simp c r2 in
       
   207         let (r2s, f2s) = simp r2 in
       
   208         (match r1d, r2s, r2d with
       
   209             NULL, _, _  -> (r2d, f_right f2d)
       
   210           | _, NULL, _  -> (r2d, f_right f2d)
       
   211           | _, _, NULL  -> (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
       
   212           | EMPTY, _, _ -> (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d)
       
   213           | _, EMPTY, _ -> (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d)
       
   214           | _, _, _     -> (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d))
       
   215       else 
       
   216         let (r1d, f1d) = der_simp c r1 in
       
   217         let (r2s, f2s) = simp r2 in
       
   218         (match r1d, r2s with
       
   219             NULL, _ -> (NULL, f_error)
       
   220           | _, NULL -> (NULL, f_error)
       
   221           | EMPTY, _ -> (r2s, f_seq_Void1 f1d f2s)
       
   222           | _, EMPTY -> (r1d, f_seq_Void2 f1d f2s)
       
   223           | _, _ -> (SEQ(r1d, r2s), f_seq f1d f2s))	  
       
   224   | STAR(r1) -> 
       
   225       let (r1d, f1d) = der_simp c r1 in
       
   226       (match r1d with
       
   227           NULL -> (NULL, f_error)
       
   228         | EMPTY -> (STAR r1, f_seq_Void1 f1d f_id)
       
   229         | _ -> (SEQ(r1d, STAR(r1)), f_seq f1d f_id))
       
   230   | RECD(x, r1) -> der_simp c r1 
       
   231 
       
   232 
       
   233 (* matcher function *)
       
   234 let matcher r s = nullable(ders (explode s) r);;
       
   235 
       
   236 (* lexing function (produces a value) *)
       
   237 exception LexError;;
       
   238 
       
   239 let rec lex r s = match s with
       
   240     [] -> if (nullable r) then mkeps r else raise LexError
       
   241   | c::cs -> inj r c (lex (der c r) cs);;
       
   242 
       
   243 let lexing r s = lex r (explode s);;
       
   244 
       
   245 (* lexing with simplification *)
       
   246 let rec lex_simp r s = match s with
       
   247     [] -> if (nullable r) then mkeps r else raise LexError
       
   248   | c::cs -> 
       
   249     let (r_simp, f_simp) = simp (der c r) in
       
   250     inj r c (f_simp (lex_simp r_simp cs));;
       
   251 
       
   252 let lexing_simp r s = lex_simp r (explode s);;
       
   253 
       
   254 let rec lex_simp2 r s = match s with
       
   255     [] -> if (nullable r) then mkeps r else raise LexError
       
   256   | c::cs -> 
       
   257     let (r_simp, f_simp) = der_simp c r in
       
   258     inj r c (f_simp (lex_simp2 r_simp cs));;
       
   259 
       
   260 let lexing_simp2 r s = lex_simp2 r (explode s);;
       
   261 
       
   262 
       
   263 (* lexing with accumulation *)
       
   264 let rec lex_acc r s f = match s with
       
   265     [] -> if (nullable r) then f (mkeps r) else raise LexError
       
   266   | c::cs -> 
       
   267     let (r_simp, f_simp) = simp (der c r) in
       
   268     lex_acc r_simp cs (fun v -> f (inj r c (f_simp v)));;
       
   269 
       
   270 let lexing_acc r s = lex_acc r (explode s) (f_id);;
       
   271 
       
   272 let rec lex_acc2 r s f = match s with
       
   273     [] -> if (nullable r) then f (mkeps r) else raise LexError
       
   274   | c::cs -> 
       
   275     let (r_simp, f_simp) = der_simp c r in
       
   276     lex_acc2 r_simp cs (fun v -> f (inj r c (f_simp v)));;
       
   277 
       
   278 let lexing_acc2 r s = lex_acc2 r (explode s) (f_id);;
       
   279 
       
   280 
       
   281 (* Lexing rules for a small WHILE language *)
       
   282 let sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"));;
       
   283 let digit = alts (List.map chr (explode "0123456789"));;
       
   284 let idents =  sym -- STAR(sym ++ digit);;
       
   285 let nums = plus(digit);;
       
   286 let keywords = alts 
       
   287    (List.map str ["skip"; "while"; "do"; "if"; "then"; "else"; "read"; "write"; "true"; "false"]);;
       
   288 let semicolon = str ";"
       
   289 let ops = alts 
       
   290    (List.map str [":="; "=="; "-"; "+"; "*"; "!="; "<"; ">"; "<="; ">="; "%"; "/"]);;
       
   291 let whitespace = plus(str " " ++ str "\n" ++ str "\t");;
       
   292 let rparen = str ")";;
       
   293 let lparen = str "(";;
       
   294 let begin_paren = str "{";;
       
   295 let end_paren = str "}";;
       
   296 
       
   297 
       
   298 let while_regs = STAR(("k" $ keywords) ++
       
   299                       ("i" $ idents) ++
       
   300                       ("o" $ ops) ++ 
       
   301                       ("n" $ nums) ++ 
       
   302                       ("s" $ semicolon) ++ 
       
   303                       ("p" $ (lparen ++ rparen)) ++ 
       
   304                       ("b" $ (begin_paren ++ end_paren)) ++ 
       
   305                       ("w" $ whitespace));;
       
   306 
       
   307 
       
   308 
       
   309 (* Some Tests
       
   310   ============ *)
       
   311 
       
   312 let time f x =
       
   313   let t = Sys.time() in
       
   314   let f_x = (f x; f x; f x) in
       
   315   (print_float ((Sys.time() -. t) /. 3.0); f_x);;
       
   316 
       
   317 
       
   318 let prog0 = "read n";;
       
   319 
       
   320 let prog1 = "read  n; write (n)";;
       
   321 string_of_env (env (lexing_simp while_regs prog1));;
       
   322 
       
   323 
       
   324 let prog2 = "
       
   325 i := 2;
       
   326 max := 100;
       
   327 while i < max do {
       
   328   isprime := 1;
       
   329   j := 2;
       
   330   while (j * j) <= i + 1  do {
       
   331     if i % j == 0 then isprime := 0  else skip;
       
   332     j := j + 1
       
   333   };
       
   334   if isprime == 1 then write i else skip;
       
   335   i := i + 1
       
   336 }";;
       
   337 
       
   338 let tst1 = (lexing_simp while_regs prog2 = lexing_simp2 while_regs prog2) in
       
   339 let tst2 = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2) in
       
   340 let tst3 = (lexing_simp while_regs prog2 = lexing_acc2 while_regs prog2)
       
   341 in
       
   342   print_string ("Sanity test simp vs simp2: >>" ^ (string_of_bool tst1) ^ "<<\n") ;
       
   343   print_string ("Sanity test simp vs acc:   >>" ^ (string_of_bool tst2) ^ "<<\n") ;
       
   344   print_string ("Sanity test simp vs acc2:  >>" ^ (string_of_bool tst3) ^ "<<") ;
       
   345   print_newline ();;
       
   346 
       
   347 
       
   348 
       
   349 type range = 
       
   350   To of int * int;;
       
   351 
       
   352 let (---) i j = To(i, j);; 
       
   353 
       
   354 let forby n =
       
   355   fun range -> match range with To(lo, up) ->
       
   356     (fun f -> 
       
   357        let rec loop lo = 
       
   358          if lo > up then () else (f lo; loop (lo + n))
       
   359        in loop lo);;
       
   360 
       
   361 let step_simp i = 
       
   362   (print_string ((string_of_int i) ^ ": ") ;
       
   363    time (lexing_simp while_regs) (string_repeat prog2 i) ;
       
   364    print_newline ());;
       
   365 
       
   366 let step_simp2 i = 
       
   367   (print_string ((string_of_int i) ^ ": ") ;
       
   368    time (lexing_simp2 while_regs) (string_repeat prog2 i) ;
       
   369    print_newline ());;
       
   370 
       
   371 let step_acc i = 
       
   372   (print_string ((string_of_int i) ^ ": ") ;
       
   373    time (lexing_acc while_regs) (string_repeat prog2 i) ;
       
   374    print_newline ());;
       
   375 
       
   376 let step_acc2 i = 
       
   377   (print_string ((string_of_int i) ^ ": ") ;
       
   378    time (lexing_acc2 while_regs) (string_repeat prog2 i) ;
       
   379    print_newline ());;
       
   380 
       
   381 forby 100 (100 --- 700) step_simp;;
       
   382 print_newline ();;
       
   383 forby 100 (100 --- 700) step_simp2;;
       
   384 print_newline ();;
       
   385 forby 100 (100 --- 700) step_acc;;
       
   386 print_newline ();;
       
   387 forby 100 (100 --- 700) step_acc2;;
       
   388 print_newline ();;
       
   389 forby 1000 (1000 --- 5000) step_acc;;
       
   390 print_newline ();;
       
   391 forby 1000 (1000 --- 5000) step_acc2;;
       
   392 (*print_newline ();;*)
       
   393 (* forby 500 (100 --- 5000) step_simp;; *)
       
   394