progs/haskell/re.hs
changeset 156 6a43ea9305ba
parent 3 94824659f6d7
child 359 fedc16924b76
equal deleted inserted replaced
155:c9027db225cc 156:6a43ea9305ba
    20     let diff = (fromIntegral (end - start)) / (10^12)
    20     let diff = (fromIntegral (end - start)) / (10^12)
    21     printf "%0.9f\n" (diff :: Double)
    21     printf "%0.9f\n" (diff :: Double)
    22     return ()
    22     return ()
    23 
    23 
    24 data Rexp =
    24 data Rexp =
    25    NULL 
    25    ZERO 
    26  | EMPTY 
    26  | ONE 
    27  | CHAR Char
    27  | CHAR Char
    28  | ALT Rexp Rexp
    28  | ALT Rexp Rexp
    29  | SEQ Rexp Rexp 
    29  | SEQ Rexp Rexp 
    30  | STAR Rexp 
    30  | STAR Rexp 
    31  | RECD String Rexp deriving (Eq, Show) 
    31  | RECD String Rexp deriving (Eq, Show) 
    32 
    32 
    33 data Value =
    33 data Value =
    34    Void
    34    Empty
    35  | Chr Char
    35  | Chr Char
    36  | Sequ Value Value
    36  | Sequ Value Value
    37  | Lf Value
    37  | Lf Value
    38  | Rg Value
    38  | Rg Value
    39  | Stars [Value]
    39  | Stars [Value]
    42 string_repeat :: String -> Int -> String
    42 string_repeat :: String -> Int -> String
    43 string_repeat s n = concat (replicate n s)
    43 string_repeat s n = concat (replicate n s)
    44 
    44 
    45 sequ :: [Char] -> Rexp
    45 sequ :: [Char] -> Rexp
    46 sequ s = case s of
    46 sequ s = case s of
    47   [] -> EMPTY
    47   [] -> ONE
    48   [c] -> CHAR c
    48   [c] -> CHAR c
    49   c:cs -> SEQ (CHAR c) (sequ cs)
    49   c:cs -> SEQ (CHAR c) (sequ cs)
    50 
    50 
    51 
    51 
    52 str :: String -> Rexp
    52 str :: String -> Rexp
    64 ($$) :: String -> Rexp -> Rexp 
    64 ($$) :: String -> Rexp -> Rexp 
    65 x $$ r = RECD x r
    65 x $$ r = RECD x r
    66 
    66 
    67 alts :: [Rexp] -> Rexp
    67 alts :: [Rexp] -> Rexp
    68 alts rs = case rs of
    68 alts rs = case rs of
    69   [] -> NULL
    69   [] -> ZERO
    70   [r] -> r
    70   [r] -> r
    71   r:rs -> foldl (ALT) r rs
    71   r:rs -> foldl (ALT) r rs
    72 
    72 
    73 size :: Rexp -> Int
    73 size :: Rexp -> Int
    74 size r = case r of
    74 size r = case r of
    75   NULL -> 1
    75   ZERO -> 1
    76   EMPTY -> 1
    76   ONE -> 1
    77   CHAR _ -> 1
    77   CHAR _ -> 1
    78   ALT r1 r2 -> 1 + (size r1) + (size r2)
    78   ALT r1 r2 -> 1 + (size r1) + (size r2)
    79   SEQ r1 r2 -> 1 + (size r1) + (size r2)
    79   SEQ r1 r2 -> 1 + (size r1) + (size r2)
    80   STAR r -> 1 + (size r)
    80   STAR r -> 1 + (size r)
    81   RECD _ r -> 1 + (size r)
    81   RECD _ r -> 1 + (size r)
    82 
    82 
    83 nullable :: Rexp -> Bool
    83 nullable :: Rexp -> Bool
    84 nullable r = case r of
    84 nullable r = case r of
    85   NULL -> False
    85   ZERO -> False
    86   EMPTY -> True
    86   ONE -> True
    87   CHAR _ -> False
    87   CHAR _ -> False
    88   ALT r1 r2 -> nullable(r1) || nullable(r2)
    88   ALT r1 r2 -> nullable(r1) || nullable(r2)
    89   SEQ r1 r2 -> nullable(r1) && nullable(r2)
    89   SEQ r1 r2 -> nullable(r1) && nullable(r2)
    90   STAR _ -> True
    90   STAR _ -> True
    91   RECD _ r -> nullable(r)
    91   RECD _ r -> nullable(r)
    92 
    92 
    93 der :: Char -> Rexp -> Rexp
    93 der :: Char -> Rexp -> Rexp
    94 der c r = case r of
    94 der c r = case r of
    95   NULL -> NULL
    95   ZERO -> ZERO
    96   EMPTY -> NULL
    96   ONE -> ZERO
    97   CHAR d -> if c == d then EMPTY else NULL
    97   CHAR d -> if c == d then ONE else ZERO
    98   ALT r1 r2 -> ALT (der c r1) (der c r2)
    98   ALT r1 r2 -> ALT (der c r1) (der c r2)
    99   SEQ r1 r2 -> 
    99   SEQ r1 r2 -> 
   100       if nullable r1 then ALT (SEQ (der c r1) r2) (der c r2)
   100       if nullable r1 then ALT (SEQ (der c r1) r2) (der c r2)
   101       else SEQ (der c r1) r2
   101       else SEQ (der c r1) r2
   102   STAR r -> SEQ (der c r) (STAR r)
   102   STAR r -> SEQ (der c r) (STAR r)
   107   [] -> r
   107   [] -> r
   108   c:s -> ders s (der c r)
   108   c:s -> ders s (der c r)
   109 
   109 
   110 flatten :: Value -> String
   110 flatten :: Value -> String
   111 flatten v = case v of 
   111 flatten v = case v of 
   112   Void -> ""
   112   Empty -> ""
   113   Chr c -> [c]
   113   Chr c -> [c]
   114   Lf v -> flatten v
   114   Lf v -> flatten v
   115   Rg v -> flatten v
   115   Rg v -> flatten v
   116   Sequ v1 v2 -> flatten v1 ++ flatten v2
   116   Sequ v1 v2 -> flatten v1 ++ flatten v2
   117   Stars vs -> concat (map flatten vs)
   117   Stars vs -> concat (map flatten vs)
   118   Rec _ v -> flatten v
   118   Rec _ v -> flatten v
   119 
   119 
   120 env :: Value -> [(String, String)]
   120 env :: Value -> [(String, String)]
   121 env v = case v of 
   121 env v = case v of 
   122   Void -> []
   122   Empty -> []
   123   Chr c -> []
   123   Chr c -> []
   124   Lf v -> env v
   124   Lf v -> env v
   125   Rg v -> env v
   125   Rg v -> env v
   126   Sequ v1 v2 -> env v1 ++ env v2
   126   Sequ v1 v2 -> env v1 ++ env v2
   127   Stars vs -> foldl (++) [] (map env vs)
   127   Stars vs -> foldl (++) [] (map env vs)
   133 string_of_env :: [(String, String)] -> String
   133 string_of_env :: [(String, String)] -> String
   134 string_of_env xs = intercalate "," (map string_of_pair xs)
   134 string_of_env xs = intercalate "," (map string_of_pair xs)
   135 
   135 
   136 mkeps :: Rexp -> Value
   136 mkeps :: Rexp -> Value
   137 mkeps r = case r of 
   137 mkeps r = case r of 
   138   EMPTY -> Void
   138   ONE -> Empty
   139   ALT r1 r2 -> 
   139   ALT r1 r2 -> 
   140       if nullable r1 then Lf (mkeps r1) else Rg (mkeps r2)
   140       if nullable r1 then Lf (mkeps r1) else Rg (mkeps r2)
   141   SEQ r1 r2 -> Sequ (mkeps r1) (mkeps r2)
   141   SEQ r1 r2 -> Sequ (mkeps r1) (mkeps r2)
   142   STAR r -> Stars []
   142   STAR r -> Stars []
   143   RECD x r -> Rec x (mkeps r)
   143   RECD x r -> Rec x (mkeps r)
   148   (SEQ r1 r2, Sequ v1 v2) -> Sequ (inj r1 c v1) v2
   148   (SEQ r1 r2, Sequ v1 v2) -> Sequ (inj r1 c v1) v2
   149   (SEQ r1 r2, Lf (Sequ v1 v2)) -> Sequ (inj r1 c v1) v2
   149   (SEQ r1 r2, Lf (Sequ v1 v2)) -> Sequ (inj r1 c v1) v2
   150   (SEQ r1 r2, Rg v2) -> Sequ (mkeps r1) (inj r2 c v2)
   150   (SEQ r1 r2, Rg v2) -> Sequ (mkeps r1) (inj r2 c v2)
   151   (ALT r1 r2, Lf v1) -> Lf (inj r1 c v1)
   151   (ALT r1 r2, Lf v1) -> Lf (inj r1 c v1)
   152   (ALT r1 r2, Rg v2) -> Rg (inj r2 c v2)
   152   (ALT r1 r2, Rg v2) -> Rg (inj r2 c v2)
   153   (CHAR d, Void) -> Chr d 
   153   (CHAR d, Empty) -> Chr d 
   154   (RECD x r1, _) -> Rec x (inj r1 c v)
   154   (RECD x r1, _) -> Rec x (inj r1 c v)
   155 
   155 
   156 f_id :: Value -> Value
   156 f_id :: Value -> Value
   157 f_id v = v
   157 f_id v = v
   158 
   158 
   170 f_seq :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   170 f_seq :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   171 f_seq f1 f2 = \v -> case v of 
   171 f_seq f1 f2 = \v -> case v of 
   172   Sequ v1 v2 -> Sequ (f1 v1) (f2 v2)
   172   Sequ v1 v2 -> Sequ (f1 v1) (f2 v2)
   173 
   173 
   174 f_seq_void1 :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   174 f_seq_void1 :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   175 f_seq_void1 f1 f2 = \v -> Sequ (f1 Void) (f2 v)
   175 f_seq_void1 f1 f2 = \v -> Sequ (f1 Empty) (f2 v)
   176 
   176 
   177 f_seq_void2 :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   177 f_seq_void2 :: (Value -> Value) -> (Value -> Value) -> Value -> Value
   178 f_seq_void2 f1 f2 = \v -> Sequ(f1 v) (f2 Void)
   178 f_seq_void2 f1 f2 = \v -> Sequ(f1 v) (f2 Empty)
   179 
   179 
   180 f_rec :: (Value -> Value) -> Value -> Value
   180 f_rec :: (Value -> Value) -> Value -> Value
   181 f_rec f = \v -> case v of
   181 f_rec f = \v -> case v of
   182     Rec x v -> Rec x (f v)
   182     Rec x v -> Rec x (f v)
   183 
   183 
   186     ALT r1 r2 -> 
   186     ALT r1 r2 -> 
   187       let (r1s, f1s) = simp r1  
   187       let (r1s, f1s) = simp r1  
   188           (r2s, f2s) = simp r2 
   188           (r2s, f2s) = simp r2 
   189       in
   189       in
   190         (case (r1s, r2s) of
   190         (case (r1s, r2s) of
   191             (NULL, _) -> (r2s, f_right f2s)
   191             (ZERO, _) -> (r2s, f_right f2s)
   192             (_, NULL) -> (r1s, f_left f1s)
   192             (_, ZERO) -> (r1s, f_left f1s)
   193             (_, _)    -> if r1s == r2s then (r1s, f_left f1s)
   193             (_, _)    -> if r1s == r2s then (r1s, f_left f1s)
   194                          else (ALT r1s r2s, f_alt f1s f2s))
   194                          else (ALT r1s r2s, f_alt f1s f2s))
   195     SEQ r1 r2 -> 
   195     SEQ r1 r2 -> 
   196       let (r1s, f1s) = simp r1 
   196       let (r1s, f1s) = simp r1 
   197           (r2s, f2s) = simp r2 
   197           (r2s, f2s) = simp r2 
   198       in
   198       in
   199         (case (r1s, r2s) of
   199         (case (r1s, r2s) of
   200           (NULL, _)  -> (NULL, f_right f2s)
   200           (ZERO, _)  -> (ZERO, f_right f2s)
   201           (_, NULL)  -> (NULL, f_left f1s)
   201           (_, ZERO)  -> (ZERO, f_left f1s)
   202           (EMPTY, _) -> (r2s, f_seq_void1 f1s f2s)
   202           (ONE, _) -> (r2s, f_seq_void1 f1s f2s)
   203           (_, EMPTY) -> (r1s, f_seq_void2 f1s f2s)
   203           (_, ONE) -> (r1s, f_seq_void2 f1s f2s)
   204           (_, _)     -> (SEQ r1s r2s, f_seq f1s f2s))
   204           (_, _)     -> (SEQ r1s r2s, f_seq f1s f2s))
   205     RECD x r1 -> 
   205     RECD x r1 -> 
   206       let (r1s, f1s) = simp r1 
   206       let (r1s, f1s) = simp r1 
   207       in
   207       in
   208         (RECD x r1s, f_rec f1s)
   208         (RECD x r1s, f_rec f1s)
   209     r -> (r, f_id)
   209     r -> (r, f_id)
   210 
   210 
   211 der_simp :: Char -> Rexp -> (Rexp, Value -> Value)
   211 der_simp :: Char -> Rexp -> (Rexp, Value -> Value)
   212 der_simp c r = case r of
   212 der_simp c r = case r of
   213     NULL -> (NULL, f_id)
   213     ZERO -> (ZERO, f_id)
   214     EMPTY -> (NULL, f_id)
   214     ONE -> (ZERO, f_id)
   215     CHAR(d) -> ((if c == d then EMPTY else NULL), f_id)
   215     CHAR(d) -> ((if c == d then ONE else ZERO), f_id)
   216     ALT r1 r2 -> 
   216     ALT r1 r2 -> 
   217       let (r1d, f1d) = der_simp c r1
   217       let (r1d, f1d) = der_simp c r1
   218           (r2d, f2d) = der_simp c r2 
   218           (r2d, f2d) = der_simp c r2 
   219       in
   219       in
   220         (case (r1d, r2d) of 
   220         (case (r1d, r2d) of 
   221           (NULL, _) -> (r2d, f_right f2d)
   221           (ZERO, _) -> (r2d, f_right f2d)
   222           (_, NULL) -> (r1d, f_left f1d)
   222           (_, ZERO) -> (r1d, f_left f1d)
   223           (_, _)    -> if r1d == r2d then (r1d, f_left f1d)
   223           (_, _)    -> if r1d == r2d then (r1d, f_left f1d)
   224                        else (ALT r1d r2d, f_alt f1d f2d))
   224                        else (ALT r1d r2d, f_alt f1d f2d))
   225     SEQ r1 r2 -> 
   225     SEQ r1 r2 -> 
   226       if nullable r1 
   226       if nullable r1 
   227       then 
   227       then 
   228         let (r1d, f1d) = der_simp c r1 
   228         let (r1d, f1d) = der_simp c r1 
   229             (r2d, f2d) = der_simp c r2 
   229             (r2d, f2d) = der_simp c r2 
   230             (r2s, f2s) = simp r2 
   230             (r2s, f2s) = simp r2 
   231         in
   231         in
   232           (case (r1d, r2s, r2d) of
   232           (case (r1d, r2s, r2d) of
   233              (NULL, _, _)  -> (r2d, f_right f2d)
   233              (ZERO, _, _)  -> (r2d, f_right f2d)
   234              (_, NULL, _)  -> (r2d, f_right f2d)
   234              (_, ZERO, _)  -> (r2d, f_right f2d)
   235              (_, _, NULL)  -> (SEQ r1d r2s, f_left (f_seq f1d f2s))
   235              (_, _, ZERO)  -> (SEQ r1d r2s, f_left (f_seq f1d f2s))
   236              (EMPTY, _, _) -> (ALT r2s r2d, f_alt (f_seq_void1 f1d f2s) f2d)
   236              (ONE, _, _) -> (ALT r2s r2d, f_alt (f_seq_void1 f1d f2s) f2d)
   237              (_, EMPTY, _) -> (ALT r1d r2d, f_alt (f_seq_void2 f1d f2s) f2d)
   237              (_, ONE, _) -> (ALT r1d r2d, f_alt (f_seq_void2 f1d f2s) f2d)
   238              (_, _, _)     -> (ALT (SEQ r1d r2s) r2d, f_alt (f_seq f1d f2s) f2d))
   238              (_, _, _)     -> (ALT (SEQ r1d r2s) r2d, f_alt (f_seq f1d f2s) f2d))
   239       else 
   239       else 
   240         let (r1d, f1d) = der_simp c r1 
   240         let (r1d, f1d) = der_simp c r1 
   241             (r2s, f2s) = simp r2 
   241             (r2s, f2s) = simp r2 
   242         in
   242         in
   243           (case (r1d, r2s) of
   243           (case (r1d, r2s) of
   244              (NULL, _)  -> (NULL, f_id)
   244              (ZERO, _)  -> (ZERO, f_id)
   245              (_, NULL)  -> (NULL, f_id)
   245              (_, ZERO)  -> (ZERO, f_id)
   246              (EMPTY, _) -> (r2s, f_seq_void1 f1d f2s)
   246              (ONE, _) -> (r2s, f_seq_void1 f1d f2s)
   247              (_, EMPTY) -> (r1d, f_seq_void2 f1d f2s)
   247              (_, ONE) -> (r1d, f_seq_void2 f1d f2s)
   248              (_, _) -> (SEQ r1d r2s, f_seq f1d f2s))	  
   248              (_, _) -> (SEQ r1d r2s, f_seq f1d f2s))	  
   249     STAR r1 -> 
   249     STAR r1 -> 
   250       let (r1d, f1d) = der_simp c r1 
   250       let (r1d, f1d) = der_simp c r1 
   251       in
   251       in
   252         (case r1d of
   252         (case r1d of
   253            NULL -> (NULL, f_id)
   253            ZERO -> (ZERO, f_id)
   254            EMPTY -> (STAR r1, f_seq_void1 f1d f_id)
   254            ONE -> (STAR r1, f_seq_void1 f1d f_id)
   255            _ -> (SEQ r1d (STAR r1), f_seq f1d f_id))
   255            _ -> (SEQ r1d (STAR r1), f_seq f1d f_id))
   256     RECD x r1 -> der_simp c r1 
   256     RECD x r1 -> der_simp c r1 
   257 
   257 
   258 
   258 
   259 
   259