regexp: changeset 69:ecf6c61a4541

--- a/Myhill.thy	Sat Feb 05 13:56:50 2011 +0000
+++ b/Myhill.thy	Sun Feb 06 10:28:29 2011 +0000
@@ -6,67 +6,83 @@
 
 text {*
 
-The intuition behind our treatment is still automata. Taking the automaton in 
-Fig.\ref{fig_origin_auto} as an example, like any automaton, it
-represents the vehicle used to recognize a certain regular language.
-For any given string, the process starts from the left most and proceed 
-character by character to the right, driving the state transtion of automaton 
-starting from the initial state (the one marked by an short arrow). There could
-be three outcomes:
-  \begin{enumerate}
-   \item The process finally reaches the end of the string and the automaton is at an accepting
-        state, in which case the string is considered a member of the language.
-         For the automaton in Fig.\ref{fig_origin_auto}, @{text "a, ab, abb, abc, abbcc, b, baa"}
-         are such kind of strings.
-   \item The process finally reaches the end of the string but the automaton is at an non-accepting
-         state, in which case, the string is considered a non-member of the language. For the automaton 
-         in Fig.\ref{fig_origin_auto}, @{text "ad, abbd, adbd, bdabbccd"}
-         are such kind of strings. \label{case_end_reject}
-   \item \label{case_stuck}
-     The process get stuck at the middle of the string, in which case, the string is considered
-           a non-member of the lauguage.  For the automaton in Fig.\ref{fig_origin_auto}, 
-           @{text "c, acb, bbacd, aaccd"} 
-           are such kind of strings.
-  \end{enumerate}
-To avoid the situation \ref{case_stuck} above, we can augment a normal automaton with a ``absorbing state'',
-as the state $X_3$ in Fig.\ref{fig_extended_auto}. In an auguments automaton, the process of strings never
-get stuck: whenever a string is recognized as not belonging to the language, the augmented automaton
-is transfered into the ``absorbing state'' and kept there until the process reaches the end of the string, 
-in which case, the string is rejected by situation \ref{case_end_reject} above.
+A {\em determinisitc finite automata (DFA)} $M$ is a 5-tuple 
+$(Q, \Sigma, \delta, s, F)$, where:
+\begin{enumerate}
+  \item $Q$ is a finite set of {\em states}, also denoted $Q_M$.
+  \item $\Sigma$ is a finite set of {\em alphabets}, also denoted $\Sigma_M$.
+  \item $\delta$ is a {\em transition function} of type @{text "Q \<times> \<Sigma> \<Rightarrow> Q"} (a total function),
+           also denoted $\delta_M$.
+  \item @{text "s \<in> Q"} is a state called {\em initial state}, also denoted $s_M$.
+  \item @{text "F \<subseteq> Q"} is a set of states named {\em accepting states}, also denoted $F_M$.
+\end{enumerate}
+Therefore, we have $M = (Q_M, \Sigma_M, \delta_M, s_M, F_M)$. Every DFA $M$ can be interpreted as 
+a function assigning states to strings, denoted $\dfa{M}$, the  definition of which is as the following:
+\begin{equation}
+\begin{aligned}
+  \dfa{M}([]) &\equiv s_M \\
+   \dfa{M}(xa) &\equiv  \delta_M(\dfa{M}(x), a)
+\end{aligned}
+\end{equation}
+A string @{text "x"} is said to be {\em accepted} (or {\em recognized}) by a DFA $M$ if 
+$\dfa{M}(x) \in F_M$. The language recoginzed by DFA $M$, denoted
+$L(M)$, is defined as:
+\begin{equation}
+  L(M) \equiv \{x~|~\dfa{M}(x) \in F_M\}
+\end{equation}
+The standard way of specifying a laugage $\Lang$ as {\em regular} is by stipulating that:
+$\Lang = L(M)$ for some DFA $M$.
 
-Given a language @{text "Lang"} and a string @{text "x"}, the equivalent class @{text "\<approx>Lang `` {x}"}
-corresponds to the state reached by processing @{text "x"} with the augmented automaton. Since  
-@{text "\<approx>Lang `` {x}"} is defined for every @{text "x"}, it corresponds to the fact that 
-the processing of @{text "x"} will never get stuck.
+For any DFA $M$, the DFA obtained by changing initial state to another $p \in Q_M$ is denoted $M_p$, 
+which is defined as:
+\begin{equation}
+    M_p \ \equiv\ (Q_M, \Sigma_M, \delta_M, p, F_M) 
+\end{equation}
+Two states $p, q \in Q_M$ are said to be {\em equivalent}, denoted $p \approx_M q$, iff.
+\begin{equation}\label{m_eq_def}
+  L(M_p) = L(M_q)
+\end{equation}
+It is obvious that $\approx_M$ is an equivalent relation over $Q_M$. and 
+the partition induced by $\approx_M$ has $|Q_M|$ equivalent classes.
+By overloading $\approx_M$,  an equivalent relation over strings can be defined:
+\begin{equation}
+  x \approx_M y ~ ~ \equiv ~ ~ \dfa{M}(x) \approx_M \dfa{M}(y)
+\end{equation}
+It can be proved that the the partition induced by $\approx_M$ also has $|Q_M|$ equivalent classes.
+It is also easy to show that: if $x \approx_M y$, then $x \approx_{L(M)} y$, and this means
+$\approx_M$ is a more refined equivalent relation than $\approx_{L(M)}$. Since partition induced by
+$\approx_M$ is finite, the one induced by $\approx_{L(M)}$ must also be finite. Now, we get one direction
+of \mht:
+\begin{Lem}[\mht , Direction one]
+  If a language $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$), then 
+  the partition induced by $\approx_\Lang$ is finite.
+\end{Lem}
 
-The most acquainted way to define a regular language @{text "Lang"} is by giving an automaton which
-recorgizes every string in @{text "Lang"}. Fig.\ref{fig_origin_auto} gives such a automaton, which 
-can be viewed as a way of assigning status to strings: for any given string @{text "x"}:
+The other direction is:
+\begin{Lem}[\mht , Direction two]\label{auto_mh_d2}
+  If  the partition induced by $\approx_\Lang$ is finite, then
+  $\Lang$ is regular (i.e. $\Lang = L(M)$ for some DFA $M$).
+\end{Lem}
+To prove this lemma, a DFA $M_\Lang$ is constructed out of $\approx_\Lang$ with:
+\begin{subequations}
+\begin{eqnarray}
+  Q_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Sigma^* \}\\
+  \Sigma_{M_\Lang} ~ & \equiv & ~ \Sigma_M \\
+  \delta_{M_\Lang} ~ & \equiv & ~ \left (\lambda (\cls{x}{\approx_\Lang}, a).  \cls{xa}{\approx_\Lang} \right) \\
+  s_{M_\Lang} ~ & \equiv & ~ \cls{[]}{\approx_\Lang} \\
+  F_{M_\Lang} ~ & \equiv & ~ \{ \cls{x}{\approx_\Lang}~|~ x \in \Lang \}
+\end{eqnarray}
+\end{subequations}
+From the assumption of lemma \ref{auto_mh_d2}, we have that $\{ \cls{x}{\approx_\Lang}~|~ x \in \Sigma^* \}$
+is finite
+It can be proved that $\Lang = L(M_\Lang)$.
 
+*}
 
+text {*
 
 \begin{figure}[h!]
 \centering
-\subfigure[Original automaton]{\label{fig_origin_auto}
-\begin{minipage}{0.4\textwidth}
-\scalebox{0.8}{
-\begin{tikzpicture}[ultra thick,>=latex]
-  \node[draw,circle,initial] (start) {$X_0$};
-  \node[draw,circle,accepting] at ($(start) + (3.5cm,2cm)$) (ac1) {$X_1$};
-  \node[draw,circle,accepting] at ($(start) + (3.5cm,-2cm)$) (ac2) {$X_2$};
-
-  \path[->] (start) edge [bend left] node [midway, above] {$a$} (ac1);
-  \path[->] (start) edge [bend right] node [midway, below] {$b$} (ac2);
-  \path[->] (ac1) edge [loop above] node [midway, above] {$b$} (ac1);
-  \path[->] (ac2) edge [loop below] node [midway, below] {$a$} (ac2);
-  \path[->] (ac1) edge [bend right] node [midway, left] {$c$} (ac2);
-  \path[->] (ac2) edge [bend right] node [midway, right] {$c$} (ac1);
-  \path[->] (ac1) edge node [midway, sloped, above] {$d$} (start);
-  \path[->] (ac2) edge node [midway, sloped, above] {$d$} (start);
-\end{tikzpicture}}
-\end{minipage}
-}
-\subfigure[Extended automaton]{\label{fig_extended_auto}
 \begin{minipage}{0.5\textwidth}
 \scalebox{0.8}{
 \begin{tikzpicture}[ultra thick,>=latex]
@@ -91,7 +107,6 @@
   \path[->,dashed] (ab) edge [loop right] node [midway, right] {$\Sigma$} (ab);
 \end{tikzpicture}}
 \end{minipage}
-}
 \caption{The relationship between automata and finite partition}\label{fig_auto_part_rel}
 \end{figure}

--- a/tphols-2011/document/root.tex	Sat Feb 05 13:56:50 2011 +0000
+++ b/tphols-2011/document/root.tex	Sun Feb 06 10:28:29 2011 +0000
@@ -1,6 +1,7 @@
 \documentclass[11pt,a4paper]{article}
 \usepackage{isabelle,isabellesym}
 \usepackage{amsmath}
+\usepackage{amsthm}
 \usepackage{tikz}
 \usetikzlibrary{arrows,automata,decorations,fit,calc}
 \usetikzlibrary{shapes,shapes.arrows,snakes,positioning}
@@ -26,7 +27,7 @@
   %for \<onesuperior>, \<onequarter>, \<twosuperior>, \<onehalf>,
   %\<threesuperior>, \<threequarters>, \<degree>
 
-%\usepackage[only,bigsqcap]{stmaryrd}
+\usepackage{stmaryrd}
   %for \<Sqinter>
 
 %\usepackage{eufrak}
@@ -45,6 +46,13 @@
 % for uniform font size
 %\renewcommand{\isastyle}{\isastyleminor}
 
+{\theoremstyle{break} \newtheorem{Lem}{Lemma}}
+
+\newcommand{\dfa}[1]{\hat{\delta}_{#1}}
+\newcommand{\Lang}{\mathcal{L}}
+\newcommand{\mht}{Myhill-Nerode Theorem}
+\newcommand{\qnt}[2]{{#1}//{#2}}
+\newcommand{\cls}[2]{\llbracket{#1}\rrbracket_{#2}}
 
 \begin{document}

Binary file tphols-2011/myhill.pdf has changed

author	zhang
	Sun, 06 Feb 2011 10:28:29 +0000
changeset 69	ecf6c61a4541
parent 68	dceb84acdee8
child 70	8ab3a06577cf

Myhill.thy		file \| annotate \| diff \| comparison \| revisions
tphols-2011/document/root.tex		file \| annotate \| diff \| comparison \| revisions
tphols-2011/myhill.pdf		file \| annotate \| diff \| comparison \| revisions