Attic/Myhill.thy
changeset 170 b1258b7d2789
parent 99 54aa3b6dd71c
equal deleted inserted replaced
169:b794db0b79db 170:b1258b7d2789
       
     1 theory Myhill
       
     2   imports Myhill_2
       
     3 begin
       
     4 
       
     5 section {* Preliminaries \label{sec_prelim}*}
       
     6 
       
     7 subsection {* Finite automata and \mht  \label{sec_fa_mh} *}
       
     8 
       
     9 text {*
       
    10 
       
    11 A {\em determinisitc finite automata (DFA)} $M$ is a 5-tuple 
       
    12 $(Q, \Sigma, \delta, s, F)$, where:
       
    13 \begin{enumerate}
       
    14   \item $Q$ is a finite set of {\em states}, also denoted $Q_M$.
       
    15   \item $\Sigma$ is a finite set of {\em alphabets}, also denoted $\Sigma_M$.
       
    16   \item $\delta$ is a {\em transition function} of type @{text "Q \<times> \<Sigma> \<Rightarrow> Q"} (a total function),
       
    17            also denoted $\delta_M$.
       
    18   \item @{text "s \<in> Q"} is a state called {\em initial state}, also denoted $s_M$.
       
    19   \item @{text "F \<subseteq> Q"} is a set of states named {\em accepting states}, also denoted $F_M$.
       
    20 \end{enumerate}
       
    21 Therefore, we have $M = (Q_M, \Sigma_M, \delta_M, s_M, F_M)$. Every DFA $M$ can be interpreted as 
       
    22 a function assigning states to strings, denoted $\dfa{M}$, the  definition of which is as the following:
       
    23 \begin{equation}
       
    24 \begin{aligned}
       
    25   \dfa{M}([]) &\equiv s_M \\
       
    26    \dfa{M}(xa) &\equiv  \delta_M(\dfa{M}(x), a)
       
    27 \end{aligned}
       
    28 \end{equation}
       
    29 A string @{text "x"} is said to be {\em accepted} (or {\em recognized}) by a DFA $M$ if 
       
    30 $\dfa{M}(x) \in F_M$. The language recoginzed by DFA $M$, denoted
       
    31 $L(M)$, is defined as:
       
    32 \begin{equation}
       
    33   L(M) \equiv \{x~|~\dfa{M}(x) \in F_M\}
       
    34 \end{equation}
       
    35 The standard way of specifying a laugage $\Lang$ as {\em regular} is by stipulating that:
       
    36 $\Lang = L(M)$ for some DFA $M$.
       
    37 
       
    38 For any DFA $M$, the DFA obtained by changing initial state to another $p \in Q_M$ is denoted $M_p$, 
       
    39 which is defined as:
       
    40 \begin{equation}
       
    41     M_p \ \equiv\ (Q_M, \Sigma_M, \delta_M, p, F_M) 
       
    42 \end{equation}
       
    43 Two states $p, q \in Q_M$ are said to be {\em equivalent}, denoted $p \approx_M q$, iff.
       
    44 \begin{equation}\label{m_eq_def}
       
    45   L(M_p) = L(M_q)
       
    46 \end{equation}
       
    47 It is obvious that $\approx_M$ is an equivalent relation over $Q_M$. and 
       
    48 the partition induced by $\approx_M$ has $|Q_M|$ equivalent classes.
       
    49 By overloading $\approx_M$,  an equivalent relation over strings can be defined:
       
    50 \begin{equation}
       
    51   x \approx_M y ~ ~ \equiv ~ ~ \dfa{M}(x) \approx_M \dfa{M}(y)
       
    52 \end{equation}
       
    53 It can be proved that the the partition induced by $\approx_M$ also has $|Q_M|$ equivalent classes.
       
    54 It is also easy to show that: if $x \approx_M y$, then $x \approx_{L(M)} y$, and this means
       
    55 $\approx_M$ is a more refined equivalent relation than $\approx_{L(M)}$. Since partition induced by
       
    56 $\approx_M$ is finite, the one induced by $\approx_{L(M)}$ must also be finite, and this is 
       
    57 one of the two directions of \mht:
       
    58 \begin{Lem}[\mht , Direction two]
       
    59   If a language $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$), then 
       
    60   the partition induced by $\approx_\Lang$ is finite.
       
    61 \end{Lem}
       
    62 
       
    63 The other direction is:
       
    64 \begin{Lem}[\mht , Direction one]\label{auto_mh_d1}
       
    65   If  the partition induced by $\approx_\Lang$ is finite, then
       
    66   $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$).
       
    67 \end{Lem}
       
    68 The $M$ we are seeking when prove lemma \ref{auto_mh_d2} can be constructed out of $\approx_\Lang$,
       
    69 denoted $M_\Lang$ and defined as the following:
       
    70 \begin{subequations}
       
    71 \begin{eqnarray}
       
    72   Q_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Sigma^* \}\\
       
    73   \Sigma_{M_\Lang} ~ & \equiv & ~ \Sigma_M \\
       
    74   \delta_{M_\Lang} ~ & \equiv & ~ \left (\lambda (\cls{x}{\approx_\Lang}, a).  \cls{xa}{\approx_\Lang} \right) \\
       
    75   s_{M_\Lang} ~ & \equiv & ~ \cls{[]}{\approx_\Lang} \\
       
    76   F_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Lang \}
       
    77 \end{eqnarray}
       
    78 \end{subequations}
       
    79 It can be proved that $Q_{M_\Lang}$ is indeed finite and $\Lang = L(M_\Lang)$, so lemma \ref{auto_mh_d1} holds.
       
    80 It can also be proved that $M_\Lang$ is the minimal DFA (therefore unique) which recoginzes $\Lang$.
       
    81 
       
    82 
       
    83 
       
    84 *}
       
    85 
       
    86 subsection {* The objective and the underlying intuition *}
       
    87 
       
    88 text {*
       
    89   It is now obvious from section \ref{sec_fa_mh} that \mht\ can be established easily when
       
    90   {\em reglar languages} are defined as ones recognized by finite automata. 
       
    91   Under the context where the use of finite automata is forbiden, the situation is quite different.
       
    92   The theorem now has to be expressed as:
       
    93   \begin{Thm}[\mht , Regular expression version]
       
    94       A language $\Lang$ is regular (i.e. $\Lang = L(\re)$ for some {\em regular expression} $\re$)
       
    95       iff. the partition induced by $\approx_\Lang$ is finite.
       
    96   \end{Thm}
       
    97   The proof of this version consists of two directions (if the use of automata are not allowed):
       
    98   \begin{description}
       
    99     \item [Direction one:] 
       
   100       generating a regular expression $\re$ out of the finite partition induced by $\approx_\Lang$,
       
   101       such that $\Lang = L(\re)$.
       
   102     \item [Direction two:]
       
   103           showing the finiteness of the partition induced by $\approx_\Lang$, under the assmption
       
   104           that $\Lang$ is recognized by some regular expression $\re$ (i.e. $\Lang = L(\re)$).
       
   105   \end{description}
       
   106   The development of these two directions consititutes the body of this paper.
       
   107 
       
   108 *}
       
   109 
       
   110 section {* Direction @{text "regular language \<Rightarrow>finite partition"} *}
       
   111 
       
   112 text {*
       
   113   Although not used explicitly, the notion of finite autotmata and its relationship with 
       
   114   language partition, as outlined in section \ref{sec_fa_mh}, still servers as important intuitive 
       
   115   guides in the development of this paper.
       
   116   For example, {\em Direction one} follows the {\em Brzozowski algebraic method}
       
   117   used to convert finite autotmata to regular expressions, under the intuition that every 
       
   118   partition member $\cls{x}{\approx_\Lang}$ is a state in the DFA $M_\Lang$  constructed to prove 
       
   119   lemma \ref{auto_mh_d1} of section \ref{sec_fa_mh}.
       
   120 
       
   121   The basic idea of Brzozowski method is to extract an equational system out of the 
       
   122   transition relationship of the automaton in question. In the equational system, every
       
   123   automaton state is represented by an unknown, the solution of which is expected to be 
       
   124   a regular expresion characterizing the state in a certain sense. There are two choices of 
       
   125   how a automaton state can be characterized.  The first is to characterize by the set of 
       
   126   strings leading from the state in question into accepting states. 
       
   127   The other choice is to characterize by the set of strings leading from initial state 
       
   128   into the state in question. For the second choice, the language recognized the automaton 
       
   129   can be characterized by the solution of initial state, while for the second choice, 
       
   130   the language recoginzed by the automaton can be characterized by 
       
   131   combining solutions of all accepting states by @{text "+"}. Because of the automaton 
       
   132   used as our intuitive guide, the $M_\Lang$, the states of which are 
       
   133   sets of strings leading from initial state, the second choice is used in this paper.
       
   134 
       
   135   Supposing the automaton in Fig \ref{fig_auto_part_rel} is the $M_\Lang$ for some language $\Lang$, 
       
   136   and suppose $\Sigma = \{a, b, c, d, e\}$. Under the second choice, the equational system extracted is:
       
   137   \begin{subequations}
       
   138   \begin{eqnarray}
       
   139     X_0 & = & X_1 \cdot c + X_2 \cdot d + \lambda \label{x_0_def_o} \\
       
   140     X_1 & = & X_0 \cdot a + X_1 \cdot b + X_2 \cdot d \label{x_1_def_o} \\
       
   141     X_2 & = & X_0 \cdot b + X_1 \cdot d + X_2 \cdot a \\
       
   142     X_3 & = & \begin{aligned}
       
   143                  & X_0 \cdot (c + d + e) + X_1 \cdot (a + e) + X_2 \cdot (b + e) + \\
       
   144                  & X_3 \cdot (a + b + c + d + e)
       
   145               \end{aligned}
       
   146   \end{eqnarray}
       
   147   \end{subequations} 
       
   148 
       
   149 \begin{figure}[h!]
       
   150 \centering
       
   151 \begin{minipage}{0.5\textwidth}
       
   152 \scalebox{0.8}{
       
   153 \begin{tikzpicture}[ultra thick,>=latex]
       
   154   \node[draw,circle,initial] (start) {$X_0$};
       
   155   \node[draw,circle,accepting] at ($(start) + (3.5cm,2cm)$) (ac1) {$X_1$};
       
   156   \node[draw,circle,accepting] at ($(start) + (3.5cm,-2cm)$) (ac2) {$X_2$};
       
   157   \node[draw,circle] at ($(start) + (6.5cm,0cm)$) (ab) {$X_3$};
       
   158 
       
   159   \path[->] (start) edge [bend left] node [midway, above] {$a$} (ac1);
       
   160   \path[->] (start) edge [bend right] node [midway, below] {$b$} (ac2);
       
   161   \path[->] (ac1) edge [loop above] node [midway, above] {$b$} (ac1);
       
   162   \path[->] (ac2) edge [loop below] node [midway, below] {$a$} (ac2);
       
   163   \path[->] (ac1) edge [bend right] node [midway, left] {$c$} (ac2);
       
   164   \path[->] (ac2) edge [bend right] node [midway, right] {$c$} (ac1);
       
   165   \path[->] (ac1) edge node [midway, sloped, above] {$d$} (start);
       
   166   \path[->] (ac2) edge node [midway, sloped, above] {$d$} (start);
       
   167 
       
   168   \path [draw, rounded corners,->,dashed] (start) -- ($(start) + (0cm, 3.7cm)$)
       
   169          -- ($(ab) + (0cm, 3.7cm)$)  node[midway,above,sloped]{$\Sigma - \{a, b\}$} -- (ab);
       
   170   \path[->,dashed] (ac1) edge node [midway, above, sloped] {$\Sigma - \{b,c,d\}$} (ab);
       
   171   \path[->,dashed] (ac2) edge node [midway, below, sloped] {$\Sigma - \{a,c,d\}$} (ab);
       
   172   \path[->,dashed] (ab) edge [loop right] node [midway, right] {$\Sigma$} (ab);
       
   173 \end{tikzpicture}}
       
   174 \end{minipage}
       
   175 \caption{An example automaton}\label{fig_auto_part_rel}
       
   176 \end{figure}
       
   177 
       
   178   Every $\cdot$-item on the right side of equations describes some state transtions, except 
       
   179   the $\lambda$ in \eqref{x_0_def_o}, which represents empty string @{text "[]"}. 
       
   180   The reason is that: every state is characterized by the
       
   181   set of incoming strings leading from initial state. For non-initial state, every such
       
   182   string can be splitted into a prefix leading into a preceding state and a single character suffix 
       
   183   transiting into from the preceding state. The exception happens at
       
   184   initial state, where the empty string is a incoming string which can not be splitted. The $\lambda$
       
   185   in \eqref{x_0_def_o} is introduce to repsent this indivisible string. There is one and only one
       
   186   $\lambda$ in every equational system such obtained, becasue $[]$ can only be contaied in one
       
   187   equivalent class (the intial state in $M_\Lang$) and equivalent classes are disjoint. 
       
   188   
       
   189   Suppose all unknowns ($X_0, X_1, X_2, X_3$) are  solvable, the regular expression charactering 
       
   190   laugnage $\Lang$ is $X_1 + X_2$. This paper gives a procedure
       
   191   by which arbitrarily picked unknown can be solved. The basic idea to solve $X_i$ is by 
       
   192   eliminating all variables other than $X_i$ from the equational system. If
       
   193   $X_0$ is the one picked to be solved,  variables $X_1, X_2, X_3$ have to be removed one by 
       
   194   one.  The order to remove does not matter as long as the remaing equations are kept valid.
       
   195   Suppose $X_1$ is the first one to remove, the action is to replace all occurences of $X_1$ 
       
   196   in remaining equations by the right hand side of its characterizing equation, i.e. 
       
   197   the $ X_0 \cdot a + X_1 \cdot b + X_2 \cdot d$ in \eqref{x_1_def_o}. However, because
       
   198   of the recursive occurence of $X_1$, this replacement does not really removed $X_1$. Arden's
       
   199   lemma is invoked to transform recursive equations like \eqref{x_1_def_o} into non-recursive 
       
   200   ones. For example, the recursive equation \eqref{x_1_def_o} is transformed into the follwing
       
   201   non-recursive one:
       
   202   \begin{equation}
       
   203      X_1  =  (X_0 \cdot a + X_2 \cdot d) \cdot b^* = X_0 \cdot (a \cdot b^*) + X_2 \cdot (d \cdot b^*)
       
   204   \end{equation}
       
   205   which, by Arden's lemma, still characterizes $X_1$ correctly. By subsituting
       
   206   $(X_0 \cdot a + X_2 \cdot d) \cdot b^*$ for all $X_1$ and removing  \eqref{x_1_def_o},
       
   207   we get:
       
   208   \begin{subequations}
       
   209   \begin{eqnarray}
       
   210     X_0 & = & \begin{aligned}
       
   211               & (X_0 \cdot (a \cdot b^*) + X_2 \cdot (d \cdot b^*)) \cdot c + 
       
   212                 X_2 \cdot d + \lambda = \\
       
   213               & X_0 \cdot (a \cdot b^* \cdot c) + X_2 \cdot (d \cdot b^* \cdot c) + 
       
   214                 X_2 \cdot d + \lambda = \\
       
   215               &  X_0 \cdot (a \cdot b^* \cdot c) + X_2 \cdot (d \cdot b^* \cdot c + d) + \lambda 
       
   216               \end{aligned} \label{x_0_def_1} \\
       
   217     X_2 & = & \begin{aligned}
       
   218                  & X_0 \cdot b + (X_0 \cdot (a \cdot b^*) + X_2 \cdot (d \cdot b^*)) \cdot d + X_2 \cdot a = \\
       
   219                  & X_0 \cdot b + X_0 \cdot (a \cdot b^* \cdot d) + X_2 \cdot (d \cdot b^* \cdot d) + X_2 \cdot a = \\
       
   220                  & X_0 \cdot (b + a \cdot b^* \cdot d) + X_2 \cdot (d \cdot b^* \cdot d + a)
       
   221               \end{aligned} \\
       
   222     X_3 & = & \begin{aligned}
       
   223                  & X_0 \cdot (c + d + e) + ((X_0 \cdot a + X_2 \cdot d) \cdot b^*) \cdot (a + e)\\
       
   224                  & + X_2 \cdot (b + e) + X_3 \cdot (a + b + c + d + e) \label{x_3_def_1}
       
   225               \end{aligned}
       
   226   \end{eqnarray}
       
   227   \end{subequations}  
       
   228 Suppose $X_3$ is the one to remove next, since $X_3$ dose not appear in either $X_0$ or $X_2$, 
       
   229 the removal of equation \eqref{x_3_def_1} changes nothing in the rest equations. Therefore, we get:
       
   230  \begin{subequations}
       
   231   \begin{eqnarray}
       
   232     X_0 & = & X_0 \cdot (a \cdot b^* \cdot c) + X_2 \cdot (d \cdot b^* \cdot c + d) + \lambda  \label{x_0_def_2} \\
       
   233     X_2 & = & X_0 \cdot (b + a \cdot b^* \cdot d) + X_2 \cdot (d \cdot b^* \cdot d + a) \label{x_2_def_2}
       
   234   \end{eqnarray}
       
   235   \end{subequations}  
       
   236 Actually, since absorbing state like $X_3$ contributes nothing to the language $\Lang$, it could have been removed
       
   237 at the very beginning of this precedure without affecting the final result. Now, the last unknown to remove 
       
   238 is $X_2$ and the Arden's transformaton of \eqref{x_2_def_2} is:
       
   239 \begin{equation} \label{x_2_ardened}
       
   240    X_2 ~ = ~  (X_0 \cdot (b + a \cdot b^* \cdot d)) \cdot (d \cdot b^* \cdot d + a)^* =
       
   241               X_0 \cdot ((b + a \cdot b^* \cdot d) \cdot (d \cdot b^* \cdot d + a)^*)
       
   242 \end{equation}
       
   243 By substituting the right hand side of \eqref{x_2_ardened} into \eqref{x_0_def_2}, we get:
       
   244 \begin{equation}
       
   245 \begin{aligned}
       
   246   X_0  & = && X_0 \cdot (a \cdot b^* \cdot c) + \\
       
   247        &   && X_0 \cdot ((b + a \cdot b^* \cdot d) \cdot (d \cdot b^* \cdot d + a)^*) \cdot 
       
   248            (d \cdot b^* \cdot c + d) + \lambda \\
       
   249        & =  && X_0 \cdot ((a \cdot b^* \cdot c) + \\
       
   250        &   && \hspace{3em} ((b + a \cdot b^* \cdot d) \cdot (d \cdot b^* \cdot d + a)^*) \cdot 
       
   251            (d \cdot b^* \cdot c + d)) + \lambda 
       
   252 \end{aligned}
       
   253 \end{equation}
       
   254 By applying Arden's transformation to this, we get the solution of $X_0$ as:
       
   255 \begin{equation}
       
   256 \begin{aligned}
       
   257   X_0  =  ((a \cdot b^* \cdot c) + 
       
   258                 ((b + a \cdot b^* \cdot d) \cdot (d \cdot b^* \cdot d + a)^*) \cdot 
       
   259                         (d \cdot b^* \cdot c + d))^*
       
   260 \end{aligned}
       
   261 \end{equation}
       
   262 Using the same method, solutions for $X_1$ and $X_2$ can be obtained as well and the
       
   263 regular expressoin for $\Lang$ is just $X_1 + X_2$. The formalization of this procedure
       
   264 consititues the first direction of the {\em regular expression} verion of
       
   265 \mht. Detailed explaination are given in {\bf paper.pdf} and more details and comments
       
   266 can be found in the formal scripts.
       
   267 *}
       
   268 
       
   269 section {* Direction @{text "finite partition \<Rightarrow> regular language"} *}
       
   270 
       
   271 text {*
       
   272   It is well known in the theory of regular languages that
       
   273   the existence of finite  language partition amounts to the existence of 
       
   274   a minimal automaton, i.e. the $M_\Lang$ constructed in section \ref{sec_prelim}, which recoginzes the 
       
   275   same language $\Lang$. The standard way to prove the existence of finite language partition 
       
   276   is to construct a automaton out of the regular expression which recoginzes the same language, from
       
   277   which the existence of finite language partition follows immediately. As discussed in the introducton of 
       
   278   {\bf paper.pdf} as well as in [5], the problem for this approach happens when automata 
       
   279   of sub regular expressions are combined to form the automaton of the mother regular expression, 
       
   280   no matter what kind of representation is used, the formalization is cubersome, as said 
       
   281   by Nipkow in [5]: `{\em a more abstract mathod is clearly desirable}'. In this section, 
       
   282   an {\em intrinsically abstract} method is given, which completely avoid the mentioning of automata,
       
   283   let along any particular representations.
       
   284   *}
       
   285 
       
   286 text {*
       
   287   The main proof structure is a structural induction on regular expressions,
       
   288   where base cases (cases for @{const "NULL"}, @{const "EMPTY"}, @{const "CHAR"}) are quite straightforward to
       
   289   proof. Real difficulty lies in inductive cases.  By inductive hypothesis, languages defined by
       
   290   sub-expressions induce finite partitiions. Under such hypothsis, we need to prove that the language 
       
   291   defined by the composite regular expression gives rise to finite partion.  
       
   292   The basic idea is to attach a tag @{text "tag(x)"} to every string 
       
   293   @{text "x"}. The tagging fuction @{text "tag"} is carefully devised, which returns tags 
       
   294   made of equivalent classes of the partitions induced by subexpressoins, and therefore has a finite
       
   295   range. Let @{text "Lang"} be the composite language, it is proved that:
       
   296   \begin{quote}
       
   297   If strings with the same tag are equivalent with respect to @{text "Lang"}, expressed as:
       
   298   \[
       
   299   @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"}
       
   300   \]
       
   301   then the partition induced by @{text "Lang"} must be finite.
       
   302   \end{quote}
       
   303   There are two arguments for this. The first goes as the following:
       
   304   \begin{enumerate}
       
   305     \item First, the tagging function @{text "tag"} induces an equivalent relation @{text "(=tag=)"} 
       
   306           (defiintion of @{text "f_eq_rel"} and lemma @{text "equiv_f_eq_rel"}).
       
   307     \item It is shown that: if the range of @{text "tag"} (denoted @{text "range(tag)"}) is finite, 
       
   308            the partition given rise by @{text "(=tag=)"} is finite (lemma @{text "finite_eq_f_rel"}).
       
   309            Since tags are made from equivalent classes from component partitions, and the inductive
       
   310            hypothesis ensures the finiteness of these partitions, it is not difficult to prove
       
   311            the finiteness of @{text "range(tag)"}.
       
   312     \item It is proved that if equivalent relation @{text "R1"} is more refined than @{text "R2"}
       
   313            (expressed as @{text "R1 \<subseteq> R2"}),
       
   314            and the partition induced by @{text "R1"} is finite, then the partition induced by @{text "R2"}
       
   315            is finite as well (lemma @{text "refined_partition_finite"}).
       
   316     \item The injectivity assumption @{text "tag(x) = tag(y) \<Longrightarrow> x \<approx>Lang y"} implies that
       
   317             @{text "(=tag=)"} is more refined than @{text "(\<approx>Lang)"}.
       
   318     \item Combining the points above, we have: the partition induced by language @{text "Lang"}
       
   319           is finite (lemma @{text "tag_finite_imageD"}).
       
   320   \end{enumerate}
       
   321 
       
   322 We could have followed another approach given in  appendix II of Brzozowski's paper [?], where
       
   323 the set of derivatives of any regular expression can be proved to be finite. 
       
   324 Since it is easy to show that strings with same derivative are equivalent with respect to the 
       
   325 language, then the second direction follows. We believe that our
       
   326 apporoach is easy to formalize, with no need to do complicated derivation calculations
       
   327 and countings as in [???].
       
   328 *}
       
   329 
       
   330 
       
   331 end