regexp: Myhill.thy@79de7de104c8


theory Myhill
  imports Myhill_2
begin

section {* Preliminaries *}

subsection {* Finite automata and \mht  \label{sec_fa_mh} *}

text {*

A {\em determinisitc finite automata (DFA)} $M$ is a 5-tuple 
$(Q, \Sigma, \delta, s, F)$, where:
\begin{enumerate}
  \item $Q$ is a finite set of {\em states}, also denoted $Q_M$.
  \item $\Sigma$ is a finite set of {\em alphabets}, also denoted $\Sigma_M$.
  \item $\delta$ is a {\em transition function} of type @{text "Q \<times> \<Sigma> \<Rightarrow> Q"} (a total function),
           also denoted $\delta_M$.
  \item @{text "s \<in> Q"} is a state called {\em initial state}, also denoted $s_M$.
  \item @{text "F \<subseteq> Q"} is a set of states named {\em accepting states}, also denoted $F_M$.
\end{enumerate}
Therefore, we have $M = (Q_M, \Sigma_M, \delta_M, s_M, F_M)$. Every DFA $M$ can be interpreted as 
a function assigning states to strings, denoted $\dfa{M}$, the  definition of which is as the following:
\begin{equation}
\begin{aligned}
  \dfa{M}([]) &\equiv s_M \\
   \dfa{M}(xa) &\equiv  \delta_M(\dfa{M}(x), a)
\end{aligned}
\end{equation}
A string @{text "x"} is said to be {\em accepted} (or {\em recognized}) by a DFA $M$ if 
$\dfa{M}(x) \in F_M$. The language recoginzed by DFA $M$, denoted
$L(M)$, is defined as:
\begin{equation}
  L(M) \equiv \{x~|~\dfa{M}(x) \in F_M\}
\end{equation}
The standard way of specifying a laugage $\Lang$ as {\em regular} is by stipulating that:
$\Lang = L(M)$ for some DFA $M$.

For any DFA $M$, the DFA obtained by changing initial state to another $p \in Q_M$ is denoted $M_p$, 
which is defined as:
\begin{equation}
    M_p \ \equiv\ (Q_M, \Sigma_M, \delta_M, p, F_M) 
\end{equation}
Two states $p, q \in Q_M$ are said to be {\em equivalent}, denoted $p \approx_M q$, iff.
\begin{equation}\label{m_eq_def}
  L(M_p) = L(M_q)
\end{equation}
It is obvious that $\approx_M$ is an equivalent relation over $Q_M$. and 
the partition induced by $\approx_M$ has $|Q_M|$ equivalent classes.
By overloading $\approx_M$,  an equivalent relation over strings can be defined:
\begin{equation}
  x \approx_M y ~ ~ \equiv ~ ~ \dfa{M}(x) \approx_M \dfa{M}(y)
\end{equation}
It can be proved that the the partition induced by $\approx_M$ also has $|Q_M|$ equivalent classes.
It is also easy to show that: if $x \approx_M y$, then $x \approx_{L(M)} y$, and this means
$\approx_M$ is a more refined equivalent relation than $\approx_{L(M)}$. Since partition induced by
$\approx_M$ is finite, the one induced by $\approx_{L(M)}$ must also be finite, and this is 
one of the two directions of \mht:
\begin{Lem}[\mht , Direction two]
  If a language $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$), then 
  the partition induced by $\approx_\Lang$ is finite.
\end{Lem}

The other direction is:
\begin{Lem}[\mht , Direction one]\label{auto_mh_d1}
  If  the partition induced by $\approx_\Lang$ is finite, then
  $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$).
\end{Lem}
The $M$ we are seeking when prove lemma \ref{auto_mh_d2} can be constructed out of $\approx_\Lang$,
denoted $M_\Lang$ and defined as the following:
\begin{subequations}
\begin{eqnarray}
  Q_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Sigma^* \}\\
  \Sigma_{M_\Lang} ~ & \equiv & ~ \Sigma_M \\
  \delta_{M_\Lang} ~ & \equiv & ~ \left (\lambda (\cls{x}{\approx_\Lang}, a).  \cls{xa}{\approx_\Lang} \right) \\
  s_{M_\Lang} ~ & \equiv & ~ \cls{[]}{\approx_\Lang} \\
  F_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Lang \}
\end{eqnarray}
\end{subequations}
It can be proved that $Q_{M_\Lang}$ is indeed finite and $\Lang = L(M_\Lang)$, so lemma \ref{auto_mh_d1} holds.
It can also be proved that $M_\Lang$ is the minimal DFA (therefore unique) which recoginzes $\Lang$.



*}

subsection {* The objective and the underlying intuition *}

text {*
  It is now obvious from section \ref{sec_fa_mh} that \mht\ can be established easily when
  {\em reglar languages} are defined as ones recognized by finite automata. 
  Under the context where the use of finite automata is forbiden, the situation is quite different.
  The theorem now has to be expressed as:
  \begin{Thm}[\mht , Regular expression version]
      A language $\Lang$ is regular (i.e. $\Lang = L(\re)$ for some {\em regular expression} $\re$)
      iff. the partition induced by $\approx_\Lang$ is finite.
  \end{Thm}
  The proof of this version consists of two directions (if the use of automata are not allowed):
  \begin{description}
    \item [Direction one:] 
      generating a regular expression $\re$ out of the finite partition induced by $\approx_\Lang$,
      such that $\Lang = L(\re)$.
    \item [Direction two:]
          showing the finiteness of the partition induced by $\approx_\Lang$, under the assmption
          that $\Lang$ is recognized by some regular expression $\re$ (i.e. $\Lang = L(\re)$).
  \end{description}
  The development of these two directions consititutes the body of this paper.

*}

section {* Direction @{text "regular language \<Rightarrow>finite partition"} *}

text {*
  Although not used explicitly, the notion of finite autotmata and its relationship with 
  language partition, as outlined in section \ref{sec_fa_mh}, still servers as important intuitive 
  guides in the development of this paper.
  For example, {\em Direction one} follows the {\em Brzozowski algebraic method}
  used to convert finite autotmata to regular expressions, under the intuition that every 
  partition member $\cls{x}{\approx_\Lang}$ is a state in the DFA $M_\Lang$  constructed to prove 
  lemma \ref{auto_mh_d1} of section \ref{sec_fa_mh}.

  The basic idea of Brzozowski method is to set aside an unknown for every DFA state and 
  describe the state-trasition relationship by charateristic equations. 
  By solving the equational system such obtained, regular expressions 
  characterizing DFA states are obtained. There are choices of 
  how DFA states can be characterized.  The first is to characterize a DFA state by the set of 
  striings leading from the state in question into accepting states. 
  The other choice is to characterize a DFA state by the set of strings leading from initial state 
  into the state in question. For the first choice, the lauguage recognized by a DFA can 
  be characterized by the regular expression characterizing initial state, while 
  in the second choice, the languaged of the DFA can be characterized by the summation of
  regular expressions of all accepting states.
*}

text {*
\begin{figure}[h!]
\centering
\begin{minipage}{0.5\textwidth}
\scalebox{0.8}{
\begin{tikzpicture}[ultra thick,>=latex]
  \node[draw,circle,initial] (start) {$X_0$};
  \node[draw,circle,accepting] at ($(start) + (3.5cm,2cm)$) (ac1) {$X_1$};
  \node[draw,circle,accepting] at ($(start) + (3.5cm,-2cm)$) (ac2) {$X_2$};
  \node[draw,circle] at ($(start) + (6.5cm,0cm)$) (ab) {$X_3$};

  \path[->] (start) edge [bend left] node [midway, above] {$a$} (ac1);
  \path[->] (start) edge [bend right] node [midway, below] {$b$} (ac2);
  \path[->] (ac1) edge [loop above] node [midway, above] {$b$} (ac1);
  \path[->] (ac2) edge [loop below] node [midway, below] {$a$} (ac2);
  \path[->] (ac1) edge [bend right] node [midway, left] {$c$} (ac2);
  \path[->] (ac2) edge [bend right] node [midway, right] {$c$} (ac1);
  \path[->] (ac1) edge node [midway, sloped, above] {$d$} (start);
  \path[->] (ac2) edge node [midway, sloped, above] {$d$} (start);

  \path [draw, rounded corners,->,dashed] (start) -- ($(start) + (0cm, 3.7cm)$)
         -- ($(ab) + (0cm, 3.7cm)$)  node[midway,above,sloped]{$\Sigma - \{a, b\}$} -- (ab);
  \path[->,dashed] (ac1) edge node [midway, above, sloped] {$\Sigma - \{b,c,d\}$} (ab);
  \path[->,dashed] (ac2) edge node [midway, below, sloped] {$\Sigma - \{a,c,d\}$} (ab);
  \path[->,dashed] (ab) edge [loop right] node [midway, right] {$\Sigma$} (ab);
\end{tikzpicture}}
\end{minipage}
\caption{The relationship between automata and finite partition}\label{fig_auto_part_rel}
\end{figure}

  *}


end
author	zhang
	Mon, 07 Feb 2011 13:08:09 +0000
changeset 73	79de7de104c8
parent 69	ecf6c61a4541
child 78	77583805123d
permissions	-rw-r--r--