progs/sml/re.ML
changeset 3 94824659f6d7
child 156 6a43ea9305ba
equal deleted inserted replaced
2:2bc119fc8657 3:94824659f6d7
       
     1 
       
     2 datatype rexp =
       
     3    NULL 
       
     4  | EMPTY 
       
     5  | CHAR of char
       
     6  | ALT of rexp * rexp
       
     7  | SEQ of rexp * rexp 
       
     8  | STAR of rexp 
       
     9  | RECD of string * rexp
       
    10 
       
    11 datatype value =
       
    12    Void
       
    13  | Chr of char
       
    14  | Sequ of value * value
       
    15  | Left of value
       
    16  | Right of value
       
    17  | Stars of value list
       
    18  | Rec of string * value
       
    19 
       
    20 (* some helper functions for strings *)   
       
    21 fun string_repeat s n = String.concat (List.tabulate (n, fn _ => s))
       
    22 
       
    23 (* some helper functions for rexps *)
       
    24 fun seq s = case s of
       
    25     [] => EMPTY
       
    26   | [c] => CHAR(c)
       
    27   | c::cs => SEQ(CHAR(c), seq cs)
       
    28 
       
    29 fun chr c = CHAR(c)
       
    30 
       
    31 fun str s = seq(explode s)
       
    32 
       
    33 fun plus r = SEQ(r, STAR(r))
       
    34 
       
    35 infix 9 ++
       
    36 infix 9 --
       
    37 infix 9 $
       
    38 
       
    39 fun op ++ (r1, r2) = ALT(r1, r2)
       
    40 
       
    41 fun op -- (r1, r2) = SEQ(r1, r2)
       
    42 
       
    43 fun op $ (x, r) = RECD(x, r)
       
    44 
       
    45 fun alts rs = case rs of
       
    46     [] => NULL
       
    47   | [r] => r
       
    48   | r::rs => List.foldl (op ++) r rs
       
    49 
       
    50 
       
    51 (* size of a regular expressions - for testing purposes *)
       
    52 fun size r = case r of
       
    53     NULL => 1
       
    54   | EMPTY => 1
       
    55   | CHAR(_) => 1
       
    56   | ALT(r1, r2) => 1 + (size r1) + (size r2)
       
    57   | SEQ(r1, r2) => 1 + (size r1) + (size r2)
       
    58   | STAR(r) => 1 + (size r)
       
    59   | RECD(_, r) => 1 + (size r)
       
    60 
       
    61 (* nullable function: tests whether the regular 
       
    62    expression can recognise the empty string *)
       
    63 fun nullable r = case r of
       
    64     NULL => false
       
    65   | EMPTY => true
       
    66   | CHAR(_) => false
       
    67   | ALT(r1, r2) => nullable(r1) orelse nullable(r2)
       
    68   | SEQ(r1, r2) => nullable(r1) andalso nullable(r2)
       
    69   | STAR(_) => true
       
    70   | RECD(_, r) => nullable(r)
       
    71 
       
    72 (* derivative of a regular expression r w.r.t. a character c *)
       
    73 fun der c r = case r of
       
    74     NULL => NULL
       
    75   | EMPTY => NULL
       
    76   | CHAR(d) => if c = d then EMPTY else NULL
       
    77   | ALT(r1, r2) => ALT(der c r1, der c r2)
       
    78   | SEQ(r1, r2) => 
       
    79       if nullable r1 then ALT(SEQ(der c r1, r2), der c r2)
       
    80       else SEQ(der c r1, r2)
       
    81   | STAR(r) => SEQ(der c r, STAR(r))
       
    82   | RECD(_, r) => der c r
       
    83 
       
    84 (* derivative w.r.t. a list of chars (iterates der) *)
       
    85 fun ders s r = case s of 
       
    86     [] => r
       
    87   | c::s => ders s (der c r)
       
    88 
       
    89 (* extracts a string from value *)
       
    90 fun flatten v = case v of 
       
    91     Void => ""
       
    92   | Chr(c) => Char.toString c
       
    93   | Left(v) => flatten v
       
    94   | Right(v) => flatten v
       
    95   | Sequ(v1, v2) => flatten v1 ^ flatten v2
       
    96   | Stars(vs) => String.concat (List.map flatten vs)
       
    97   | Rec(_, v) => flatten v
       
    98 
       
    99 
       
   100 (* extracts an environment from a value *)
       
   101 fun env v = case v of 
       
   102     Void => []
       
   103   | Chr(c) => []
       
   104   | Left(v) => env v
       
   105   | Right(v) => env v
       
   106   | Sequ(v1, v2) => env v1 @ env v2
       
   107   | Stars(vs) => List.foldr (op @) [] (List.map env vs)
       
   108   | Rec(x, v) => (x, flatten v) :: env v
       
   109 
       
   110 fun string_of_pair (x, s) = "(" ^ x ^ "," ^ s ^ ")"
       
   111 fun string_of_env xs = String.concatWith "," (List.map string_of_pair xs)
       
   112 
       
   113 
       
   114 (* the value for a nullable rexp *)
       
   115 fun mkeps r = case r of 
       
   116     EMPTY => Void
       
   117   | ALT(r1, r2) => 
       
   118       if nullable r1 then Left(mkeps r1) else Right(mkeps r2)
       
   119   | SEQ(r1, r2) => Sequ(mkeps r1, mkeps r2)
       
   120   | STAR(r) => Stars([])
       
   121   | RECD(x, r) => Rec(x, mkeps r)
       
   122 
       
   123 exception Error
       
   124 
       
   125 (* injection of a char into a value *)
       
   126 fun inj r c v = case (r, v) of
       
   127     (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj r c v1 :: vs)
       
   128   | (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj r1 c v1, v2)
       
   129   | (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj r1 c v1, v2)
       
   130   | (SEQ(r1, r2), Right(v2)) => Sequ(mkeps r1, inj r2 c v2)
       
   131   | (ALT(r1, r2), Left(v1)) => Left(inj r1 c v1)
       
   132   | (ALT(r1, r2), Right(v2)) => Right(inj r2 c v2)
       
   133   | (CHAR(d), Void) => Chr(d) 
       
   134   | (RECD(x, r1), _) => Rec(x, inj r1 c v)
       
   135   | _ => (print ("\nr: " ^ PolyML.makestring r ^ "\n");
       
   136           print ("v: " ^ PolyML.makestring v ^ "\n");
       
   137           raise Error)
       
   138 
       
   139 (* some "rectification" functions for simplification *)
       
   140 fun f_id v = v
       
   141 fun f_right f = fn v => Right(f v)
       
   142 fun f_left f = fn v => Left(f v)
       
   143 fun f_alt f1 f2 = fn v => case v of
       
   144     Right(v) => Right(f2 v)
       
   145   | Left(v) => Left(f1 v)
       
   146 fun f_seq f1 f2 = fn v => case v of 
       
   147   Sequ(v1, v2) => Sequ(f1 v1, f2 v2)
       
   148 fun f_seq_Void1 f1 f2 = fn v => Sequ(f1 Void, f2 v)
       
   149 fun f_seq_Void2 f1 f2 = fn v => Sequ(f1 v, f2 Void)
       
   150 fun f_rec f = fn v => case v of
       
   151     Rec(x, v) => Rec(x, f v)
       
   152 
       
   153 exception ShouldNotHappen
       
   154 
       
   155 fun f_error v = raise ShouldNotHappen
       
   156 
       
   157 (* simplification of regular expressions returning also an 
       
   158    rectification function; no simplification under STARs *)
       
   159 fun simp r = case r of
       
   160     ALT(r1, r2) => 
       
   161       let val (r1s, f1s) = simp r1  
       
   162           val (r2s, f2s) = simp r2 in
       
   163         (case (r1s, r2s) of
       
   164             (NULL, _) => (r2s, f_right f2s)
       
   165           | (_, NULL) => (r1s, f_left f1s)
       
   166           | (_, _)    => if r1s = r2s then (r1s, f_left f1s)
       
   167                          else (ALT (r1s, r2s), f_alt f1s f2s))
       
   168       end 
       
   169   | SEQ(r1, r2) => 
       
   170       let val (r1s, f1s) = simp r1 
       
   171           val (r2s, f2s) = simp r2 in
       
   172         (case (r1s, r2s) of
       
   173           (NULL, _)  => (NULL, f_error)
       
   174         | (_, NULL)  => (NULL, f_error)
       
   175         | (EMPTY, _) => (r2s, f_seq_Void1 f1s f2s)
       
   176         | (_, EMPTY) => (r1s, f_seq_Void2 f1s f2s)
       
   177         | (_, _)     => (SEQ(r1s, r2s), f_seq f1s f2s))
       
   178       end  
       
   179   | RECD(x, r1) => 
       
   180       let val (r1s, f1s) = simp r1 in
       
   181         (RECD(x, r1s), f_rec f1s)
       
   182       end
       
   183   | r => (r, f_id)
       
   184 
       
   185 fun der_simp c r = case r of
       
   186     NULL => (NULL, f_id)
       
   187   | EMPTY => (NULL, f_id)
       
   188   | CHAR(d) => ((if c = d then EMPTY else NULL), f_id)
       
   189   | ALT(r1, r2) => 
       
   190       let 
       
   191         val (r1d, f1d) = der_simp c r1 
       
   192         val (r2d, f2d) = der_simp c r2 
       
   193       in
       
   194         case (r1d, r2d) of
       
   195           (NULL, _) => (r2d, f_right f2d)
       
   196         | (_, NULL) => (r1d, f_left f1d)
       
   197         | (_, _)    => if r1d = r2d then (r1d, f_left f1d)
       
   198                        else (ALT (r1d, r2d), f_alt f1d f2d)
       
   199       end
       
   200   | SEQ(r1, r2) => 
       
   201       if nullable r1 
       
   202       then 
       
   203         let 
       
   204           val (r1d, f1d) = der_simp c r1 
       
   205           val (r2d, f2d) = der_simp c r2
       
   206           val (r2s, f2s) = simp r2 
       
   207         in
       
   208           case (r1d, r2s, r2d) of
       
   209             (NULL, _, _)  => (r2d, f_right f2d)
       
   210           | (_, NULL, _)  => (r2d, f_right f2d)
       
   211           | (_, _, NULL)  => (SEQ(r1d, r2s), f_left (f_seq f1d f2s))
       
   212           | (EMPTY, _, _) => (ALT(r2s, r2d), f_alt (f_seq_Void1 f1d f2s) f2d)
       
   213           | (_, EMPTY, _) => (ALT(r1d, r2d), f_alt (f_seq_Void2 f1d f2s) f2d)
       
   214           | (_, _, _)     => (ALT(SEQ(r1d, r2s), r2d), f_alt (f_seq f1d f2s) f2d)
       
   215         end
       
   216       else 
       
   217         let 
       
   218           val (r1d, f1d) = der_simp c r1 
       
   219           val (r2s, f2s) = simp r2
       
   220         in
       
   221           case (r1d, r2s) of
       
   222             (NULL, _) => (NULL, f_error)
       
   223           | (_, NULL) => (NULL, f_error)
       
   224           | (EMPTY, _) => (r2s, f_seq_Void1 f1d f2s)
       
   225           | (_, EMPTY) => (r1d, f_seq_Void2 f1d f2s)
       
   226           | (_, _) => (SEQ(r1d, r2s), f_seq f1d f2s)
       
   227   	end	  
       
   228   | STAR(r1) => 
       
   229       let 
       
   230         val (r1d, f1d) = der_simp c r1 
       
   231       in
       
   232         case r1d of
       
   233           NULL => (NULL, f_error)
       
   234         | EMPTY => (STAR r1, f_seq_Void1 f1d f_id)
       
   235         | _ => (SEQ(r1d, STAR(r1)), f_seq f1d f_id)
       
   236       end
       
   237   | RECD(x, r1) => der_simp c r1 
       
   238 
       
   239 
       
   240 
       
   241 (* matcher function *)
       
   242 fun matcher r s = nullable(ders (explode s) r)
       
   243 
       
   244 (* lexing function (produces a value) *)
       
   245 exception LexError
       
   246 
       
   247 fun lex r s = case s of 
       
   248     [] => if (nullable r) then mkeps r else raise LexError
       
   249   | c::cs => inj r c (lex (der c r) cs)
       
   250 
       
   251 fun lexing r s = lex r (explode s)
       
   252 
       
   253 (* lexing with simplification *)
       
   254 fun lex_simp r s = case s of 
       
   255     [] => if (nullable r) then mkeps r else raise LexError
       
   256   | c::cs => 
       
   257     let val (r_simp, f_simp) = simp (der c r) in
       
   258       inj r c (f_simp (lex_simp r_simp cs))
       
   259     end
       
   260 
       
   261 fun lexing_simp r s = lex_simp r (explode s)
       
   262 
       
   263 fun lex_simp2 r s = case s of 
       
   264     [] => if (nullable r) then mkeps r else raise LexError
       
   265   | c::cs => 
       
   266     let val (r_simp, f_simp) = der_simp c r in
       
   267       inj r c (f_simp (lex_simp2 r_simp cs))
       
   268     end
       
   269 
       
   270 fun lexing_simp2 r s = lex_simp2 r (explode s)
       
   271 
       
   272 fun lex_acc r s f = case s of 
       
   273     [] => if (nullable r) then f (mkeps r) else raise LexError
       
   274   | c::cs => 
       
   275     let val (r_simp, f_simp) = simp (der c r) in
       
   276       lex_acc r_simp cs (fn v => f (inj r c (f_simp v)))
       
   277     end
       
   278 
       
   279 fun lexing_acc r s  = lex_acc r (explode s) (f_id)
       
   280 
       
   281 fun lex_acc2 r s f = case s of 
       
   282     [] => if (nullable r) then f (mkeps r) else raise LexError
       
   283   | c::cs => 
       
   284     let val (r_simp, f_simp) = der_simp c r in
       
   285       lex_acc2 r_simp cs (fn v => f (inj r c (f_simp v)))
       
   286     end
       
   287 
       
   288 fun lexing_acc2 r s  = lex_acc2 r (explode s) (f_id)
       
   289 
       
   290 
       
   291 (* Lexing rules for a small WHILE language *)
       
   292 val sym = alts (List.map chr (explode "abcdefghijklmnopqrstuvwxyz"))
       
   293 val digit = alts (List.map chr (explode "0123456789"))
       
   294 val idents =  sym -- STAR(sym ++ digit)
       
   295 val nums = plus(digit)
       
   296 val keywords = alts (List.map str ["skip", "while", "do", "if", "then", "else", "read", "write", "true", "false"])
       
   297 val semicolon = str ";"
       
   298 val ops = alts (List.map str [":=", "==", "-", "+", "*", "!=", "<", ">", "<=", ">=", "%", "/"])
       
   299 val whitespace = plus(str " " ++ str "\n" ++ str "\t")
       
   300 val rparen = str ")"
       
   301 val lparen = str "("
       
   302 val begin_paren = str "{"
       
   303 val end_paren = str "}"
       
   304 
       
   305 
       
   306 val while_regs = STAR(("k" $ keywords) ++
       
   307                       ("i" $ idents) ++
       
   308                       ("o" $ ops) ++ 
       
   309                       ("n" $ nums) ++ 
       
   310                       ("s" $ semicolon) ++ 
       
   311                       ("p" $ (lparen ++ rparen)) ++ 
       
   312                       ("b" $ (begin_paren ++ end_paren)) ++ 
       
   313                       ("w" $ whitespace))
       
   314 
       
   315 
       
   316 
       
   317 (* Some Tests
       
   318   ============ *)
       
   319 
       
   320 fun time f x =
       
   321   let
       
   322   val t_start = Timer.startCPUTimer()
       
   323   val f_x = (f x; f x; f x; f x; f x; f x; f x; f x; f x; f x)
       
   324   val t_end = Time.toReal(#usr(Timer.checkCPUTimer(t_start))) / 10.0
       
   325 in
       
   326   (print ((Real.toString t_end) ^ "\n"); f_x)
       
   327 end
       
   328 
       
   329 val prog = "ab";
       
   330 val reg = ("x" $ ((str "a") -- (str "b")));
       
   331 print("Simp: " ^ PolyML.makestring (lexing_simp reg prog) ^ "\n");
       
   332 print("Acc:  " ^ PolyML.makestring (lexing_acc  reg prog) ^ "\n");
       
   333 print("Env   " ^ string_of_env (env (lexing_acc reg prog)) ^ "\n");
       
   334 
       
   335 fun fst (x, y) = x;
       
   336 fun snd (x, y) = y;
       
   337 
       
   338 val derS = [reg,
       
   339             der #"a" reg,
       
   340             fst (simp (der #"a" reg)),
       
   341             fst (der_simp #"a" reg)];
       
   342 
       
   343 val vS = [(snd (simp (der #"a" reg))) (Chr(#"b")),
       
   344           (snd (der_simp #"a" reg)) (Chr(#"b"))
       
   345          ];
       
   346 
       
   347 print("Ders: \n" ^ 
       
   348        String.concatWith "\n" (List.map PolyML.makestring derS)
       
   349        ^ "\n\n");
       
   350 print("Vs: \n" ^ 
       
   351        String.concatWith "\n" (List.map PolyML.makestring vS)
       
   352        ^ "\n\n");
       
   353 
       
   354 
       
   355 val prog0 = "read n";
       
   356 print("Env0 is: \n" ^  string_of_env (env (lexing_acc while_regs prog0)) ^ "\n");
       
   357 
       
   358 val prog1 = "read  n; write (n)";
       
   359 print("Env1 is: \n" ^ string_of_env (env (lexing_acc while_regs prog1)) ^ "\n");
       
   360 
       
   361 
       
   362 val prog2 = String.concatWith "\n" 
       
   363   ["i := 2;",
       
   364    "max := 100;",
       
   365    "while i < max do {",
       
   366    "  isprime := 1;",
       
   367    "  j := 2;",
       
   368    "  while (j * j) <= i + 1  do {",
       
   369    "    if i % j == 0 then isprime := 0  else skip;",
       
   370    "    j := j + 1",
       
   371    "  };",
       
   372    " if isprime == 1 then write i else skip;",
       
   373    " i := i + 1",
       
   374    "}"];
       
   375 
       
   376 
       
   377 let 
       
   378   val tst = (lexing_simp while_regs prog2 = lexing_acc while_regs prog2)
       
   379 in
       
   380   print("Sanity test: >>" ^ (PolyML.makestring tst) ^ "<<\n")
       
   381 end;
       
   382 
       
   383 (* loops in ML *)
       
   384 datatype for = to of int * int
       
   385 infix to 
       
   386 
       
   387 val for =
       
   388   fn lo to up =>
       
   389     (fn f => 
       
   390        let fun loop lo = 
       
   391          if lo > up then () else (f lo; loop (lo + 1))
       
   392        in loop lo end)
       
   393 
       
   394 fun forby n =
       
   395   fn lo to up =>
       
   396     (fn f => 
       
   397        let fun loop lo = 
       
   398          if lo > up then () else (f lo; loop (lo + n))
       
   399        in loop lo end)
       
   400 
       
   401 
       
   402 fun step_simp i = 
       
   403   (print ((Int.toString i) ^ ": ") ;
       
   404    time (lexing_simp while_regs) (string_repeat prog2 i)) 
       
   405 
       
   406 fun step_simp2 i = 
       
   407   (print ((Int.toString i) ^ ": ") ;
       
   408    time (lexing_simp2 while_regs) (string_repeat prog2 i)) 
       
   409 
       
   410 fun step_acc i = 
       
   411   (print ((Int.toString i) ^ ": ") ;
       
   412    time (lexing_acc while_regs) (string_repeat prog2 i))
       
   413 
       
   414 fun step_acc2 i = 
       
   415   (print ((Int.toString i) ^ ": ") ;
       
   416    time (lexing_acc2 while_regs) (string_repeat prog2 i)) 
       
   417 
       
   418 val main1 = forby 1000 (1000 to 5000) step_simp;
       
   419 print "\n";
       
   420 val main2 = forby 1000 (1000 to 5000) step_simp2;
       
   421 print "\n";
       
   422 val main3 = forby 1000 (1000 to 5000) step_acc;
       
   423 print "\n";
       
   424 val main4 = forby 1000 (1000 to 5000) step_acc2; 
       
   425 
       
   426