Journal/Paper.thy
changeset 176 6969de1eb96b
parent 175 edc642266a82
child 177 50cc1a39c990
equal deleted inserted replaced
175:edc642266a82 176:6969de1eb96b
   367   Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages 
   367   Isabelle/HOL as @{term "UNIV::string set"}. The concatenation of two languages 
   368   is written @{term "A \<cdot> B"} and a language raised to the power @{text n} is written 
   368   is written @{term "A \<cdot> B"} and a language raised to the power @{text n} is written 
   369   @{term "A \<up> n"}. They are defined as usual
   369   @{term "A \<up> n"}. They are defined as usual
   370 
   370 
   371   \begin{center}
   371   \begin{center}
       
   372   
   372   @{thm conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}
   373   @{thm conc_def'[THEN eq_reflection, where A1="A" and B1="B"]}
   373   \hspace{7mm}
   374   \hspace{7mm}
   374   @{thm lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}
   375   @{thm lang_pow.simps(1)[THEN eq_reflection, where A1="A"]}
   375   \hspace{7mm}
   376   \hspace{7mm}
   376   @{thm lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
   377   @{thm lang_pow.simps(2)[THEN eq_reflection, where A1="A" and n1="n"]}
   401   @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined 
   402   @{text "\<lbrakk>x\<rbrakk>\<^isub>\<approx>"} for the equivalence class defined 
   402   as \mbox{@{text "{y | y \<approx> x}"}}.
   403   as \mbox{@{text "{y | y \<approx> x}"}}.
   403 
   404 
   404 
   405 
   405   Central to our proof will be the solution of equational systems
   406   Central to our proof will be the solution of equational systems
   406   involving equivalence classes of languages. For this we will use Arden's Lemma \cite{Brzozowski64},
   407   involving equivalence classes of languages. For this we will use Arden's Lemma 
       
   408   (see \cite[Page 100]{Sakarovitch09}),
   407   which solves equations of the form @{term "X = A \<cdot> X \<union> B"} provided
   409   which solves equations of the form @{term "X = A \<cdot> X \<union> B"} provided
   408   @{term "[] \<notin> A"}. However we will need the following `reverse' 
   410   @{term "[] \<notin> A"}. However we will need the following `reverse' 
   409   version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A \<cdot> X"} to
   411   version of Arden's Lemma (`reverse' in the sense of changing the order of @{term "A \<cdot> X"} to
   410   \mbox{@{term "X \<cdot> A"}}).
   412   \mbox{@{term "X \<cdot> A"}}).
   411 
   413 
   447 
   449 
   448   \noindent
   450   \noindent
   449   Regular expressions are defined as the inductive datatype
   451   Regular expressions are defined as the inductive datatype
   450 
   452 
   451   \begin{center}
   453   \begin{center}
   452   @{text r} @{text "::="}
   454   \begin{tabular}{rcl}
   453   @{term ZERO}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
   455   @{text r} & @{text "::="} & @{term ZERO}\\
   454   @{term ONE}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
   456    & @{text"|"} & @{term ONE}\\ 
   455   @{term "ATOM c"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
   457    & @{text"|"} & @{term "ATOM c"}\\
   456   @{term "TIMES r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
   458    & @{text"|"} & @{term "TIMES r r"}\\
   457   @{term "PLUS r r"}\hspace{1.5mm}@{text"|"}\hspace{1.5mm} 
   459    & @{text"|"} & @{term "PLUS r r"}\\
   458   @{term "STAR r"}
   460    & @{text"|"} & @{term "STAR r"}
       
   461   \end{tabular}
   459   \end{center}
   462   \end{center}
   460 
   463 
   461   \noindent
   464   \noindent
   462   and the language matched by a regular expression is defined as
   465   and the language matched by a regular expression is defined as
   463 
   466 
   464   \begin{center}
   467   \begin{center}
   465   \begin{tabular}{c@ {\hspace{10mm}}c}
   468   \begin{tabular}{r@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
   466   \begin{tabular}{rcl}
       
   467   @{thm (lhs) lang.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(1)}\\
   469   @{thm (lhs) lang.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(1)}\\
   468   @{thm (lhs) lang.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(2)}\\
   470   @{thm (lhs) lang.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(2)}\\
   469   @{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\
   471   @{thm (lhs) lang.simps(3)[where a="c"]} & @{text "\<equiv>"} & @{thm (rhs) lang.simps(3)[where a="c"]}\\
   470   \end{tabular}
       
   471   &
       
   472   \begin{tabular}{rcl}
       
   473   @{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
   472   @{thm (lhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
   474        @{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
   473        @{thm (rhs) lang.simps(4)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
   475   @{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
   474   @{thm (lhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]} & @{text "\<equiv>"} &
   476        @{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
   475        @{thm (rhs) lang.simps(5)[where ?r="r\<^isub>1" and ?s="r\<^isub>2"]}\\
   477   @{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\<equiv>"} &
   476   @{thm (lhs) lang.simps(6)[where r="r"]} & @{text "\<equiv>"} &
   478       @{thm (rhs) lang.simps(6)[where r="r"]}\\
   477       @{thm (rhs) lang.simps(6)[where r="r"]}\\
   479   \end{tabular}
       
   480   \end{tabular}
   478   \end{tabular}
   481   \end{center}
   479   \end{center}
   482 
   480 
   483   Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating 
   481   Given a finite set of regular expressions @{text rs}, we will make use of the operation of generating 
   484   a regular expression that matches the union of all languages of @{text rs}. We only need to know the 
   482   a regular expression that matches the union of all languages of @{text rs}. We only need to know the 
   524   the string @{text "[c]"}. The relation @{term "\<approx>({[c]})"} partitions @{text UNIV}
   522   the string @{text "[c]"}. The relation @{term "\<approx>({[c]})"} partitions @{text UNIV}
   525   into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and  @{text "X\<^isub>3"}
   523   into three equivalence classes @{text "X\<^isub>1"}, @{text "X\<^isub>2"} and  @{text "X\<^isub>3"}
   526   as follows
   524   as follows
   527   
   525   
   528   \begin{center}
   526   \begin{center}
   529   @{text "X\<^isub>1 = {[]}"}\hspace{5mm}
   527   \begin{tabular}{l}
   530   @{text "X\<^isub>2 = {[c]}"}\hspace{5mm}
   528   @{text "X\<^isub>1 = {[]}"}\\
       
   529   @{text "X\<^isub>2 = {[c]}"}\\
   531   @{text "X\<^isub>3 = UNIV - {[], [c]}"}
   530   @{text "X\<^isub>3 = UNIV - {[], [c]}"}
       
   531   \end{tabular}
   532   \end{center}
   532   \end{center}
   533 
   533 
   534   One direction of the Myhill-Nerode theorem establishes 
   534   One direction of the Myhill-Nerode theorem establishes 
   535   that if there are finitely many equivalence classes, like in the example above, then 
   535   that if there are finitely many equivalence classes, like in the example above, then 
   536   the language is regular. In our setting we therefore have to show:
   536   the language is regular. In our setting we therefore have to show:
   982   this is equal to \mbox{@{text "\<Union>\<calL> ` (Arden X rhs)"}} using the properties of the 
   982   this is equal to \mbox{@{text "\<Union>\<calL> ` (Arden X rhs)"}} using the properties of the 
   983   invariant and Lem.~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"},
   983   invariant and Lem.~\ref{ardenable}. Using the validity property for the equation @{text "X = rhs"},
   984   we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the @{text Arden} operation
   984   we can infer that @{term "rhss rhs \<subseteq> {X}"} and because the @{text Arden} operation
   985   removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
   985   removes that @{text X} from @{text rhs}, that @{term "rhss (Arden X rhs) = {}"}.
   986   This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
   986   This means the right-hand side @{term "Arden X rhs"} can only consist of terms of the form @{term "Lam r"}.
   987   So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = L (\<Uplus>rs)"}.
   987   So we can collect those (finitely many) regular expressions @{text rs} and have @{term "X = lang (\<Uplus>rs)"}.
   988   With this we can conclude the proof.
   988   With this we can conclude the proof.
   989   \end{proof}
   989   \end{proof}
   990 
   990 
   991   \noindent
   991   \noindent
   992   Lem.~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction
   992   Lem.~\ref{every_eqcl_has_reg} allows us to finally give a proof for the first direction
   997   every equivalence class in @{term "UNIV // \<approx>A"}. Since @{text "finals A"} is
   997   every equivalence class in @{term "UNIV // \<approx>A"}. Since @{text "finals A"} is
   998   a subset of  @{term "UNIV // \<approx>A"}, we also know that for every equivalence class
   998   a subset of  @{term "UNIV // \<approx>A"}, we also know that for every equivalence class
   999   in @{term "finals A"} there exists a regular expression. Moreover by assumption 
   999   in @{term "finals A"} there exists a regular expression. Moreover by assumption 
  1000   we know that @{term "finals A"} must be finite, and therefore there must be a finite
  1000   we know that @{term "finals A"} must be finite, and therefore there must be a finite
  1001   set of regular expressions @{text "rs"} such that
  1001   set of regular expressions @{text "rs"} such that
  1002   @{term "\<Union>(finals A) = L (\<Uplus>rs)"}.
  1002   @{term "\<Union>(finals A) = lang (\<Uplus>rs)"}.
  1003   Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"} 
  1003   Since the left-hand side is equal to @{text A}, we can use @{term "\<Uplus>rs"} 
  1004   as the regular expression that is needed in the theorem.
  1004   as the regular expression that is needed in the theorem.
  1005   \end{proof}
  1005   \end{proof}
  1006 *}
  1006 *}
  1007 
  1007