# HG changeset patch # User urbanc # Date 1297188294 0 # Node ID 14b12b5de6d36f3ce37dfa8bea90d3112c6e6d44 # Parent dc879cb59c9c5d5ea54ef34ac1773e36347ebb86 added coments about functions diff -r dc879cb59c9c -r 14b12b5de6d3 Paper/Paper.thy --- a/Paper/Paper.thy Tue Feb 08 16:49:18 2011 +0000 +++ b/Paper/Paper.thy Tue Feb 08 18:04:54 2011 +0000 @@ -22,7 +22,7 @@ quotient ("_ \<^raw:\ensuremath{\!\sslash\!}> _" [90, 90] 90) and REL ("\") and UPLUS ("_ \<^raw:\ensuremath{\uplus}> _" [90, 90] 90) and - L ("L'(_')" [0] 101) and + L ("\<^raw:\ensuremath{\cal{L}}>'(_')" [0] 101) and Lam ("\'(_')" [100] 100) and Trn ("_, _" [100, 100] 100) and EClass ("\_\\<^bsub>_\<^esub>" [100, 100] 100) and @@ -50,18 +50,15 @@ obtain an automaton for the complement language. The problem, however, lies with formalising such reasoning in a HOL-based theorem prover, in our case Isabelle/HOL. Automata are build up from states and transitions that - need to be represented as graphs or matrices, neither - of which can be defined as inductive datatype.\footnote{In some works - functions are used to represent state transitions, but also they are not - inductive datatypes.} This means we have to build our own reasoning - infrastructure for them, as neither Isabelle/HOL nor HOL4 nor HOLlight support - them with libraries. + need to be represented as graphs, matrices or functions, none + of which can be defined as inductive datatype. - Even worse, reasoning about graphs and matrices can be a real hassle in HOL-based - theorem provers. Consider for example the operation of sequencing - two automata, say $A_1$ and $A_2$, by connecting the - accepting states of $A_1$ to the initial state of $A_2$: - + In case of graphs and matrices, this means we have to build our own + reasoning infrastructure for them, as neither Isabelle/HOL nor HOL4 nor + HOLlight support them with libraries. Even worse, reasoning about graphs and + matrices can be a real hassle in HOL-based theorem provers. Consider for + example the operation of sequencing two automata, say $A_1$ and $A_2$, by + connecting the accepting states of $A_1$ to the initial state of $A_2$: \begin{center} \begin{tabular}{ccc} @@ -121,10 +118,10 @@ On ``paper'' we can define the corresponding graph in terms of the disjoint union of the state nodes. Unfortunately in HOL, the definition for disjoint union, namely - - \begin{center} + % + \begin{equation}\label{disjointunion} @{term "UPLUS A\<^isub>1 A\<^isub>2 \ {(1, x) | x. x \ A\<^isub>1} \ {(2, y) | y. y \ A\<^isub>2}"} - \end{center} + \end{equation} \noindent changes the type---the disjoint union is not a set, but a set of pairs. @@ -139,18 +136,37 @@ connecting two automata represented as matrices results in very adhoc constructions, which are not pleasant to reason about. + Functions are much better supported in Isabelle/HOL, but they still lead to similar + problems as with graphs. Composing two non-deterministic automata in parallel + poses still the problem of how to implement disjoint unions. Nipkow \cite{Nipkow98} + dismisses the option using identities, because it leads to messy proofs. He + opts for a variant of \eqref{disjointunion}, but writes + + \begin{quote} + \it ``If the reader finds the above treatment in terms of bit lists revoltingly + concrete, I cannot disagree.'' + \end{quote} + + \noindent + Moreover, it is not so clear how to conveniently impose a finiteness condition + upon functions in order to represent \emph{finite} automata. The best is + probably to resort to more advanced reasoning frameworks, such as \emph{locales}. + Because of these problems to do with representing automata, there seems to be no substantial formalisation of automata theory and regular languages - carried out in a HOL-based theorem prover. We are only aware of the - large formalisation of automata theory in Nuprl \cite{Constable00} and - some smaller formalisations in Coq (for example \cite{Filliatre97}). + carried out in a HOL-based theorem prover. Nipkow establishes in + \cite{Nipkow98} the link between regular expressions and automata in + the context of lexing. The only larger formalisations of automata theory + are carried out in Nuprl \cite{Constable00} and in Coq (for example + \cite{Filliatre97}). - In this paper, we will not attempt to formalise automata theory, but take a completely - different approach to regular languages. Instead of defining a regular language as one - where there exists an automaton that recognises all strings of the language, we define - a regular language as: + In this paper, we will not attempt to formalise automata theory in + Isabelle/HOL, but take a completely different approach to regular + languages. Instead of defining a regular language as one where there exists + an automaton that recognises all strings of the language, we define a + regular language as: - \begin{definition}[A Regular Language] + \begin{definition} A language @{text A} is \emph{regular}, provided there is a regular expression that matches all strings of @{text "A"}. \end{definition} @@ -271,7 +287,7 @@ \end{center} \noindent - The language matched by a regular expression is defined as usual: + and the language matched by a regular expression is defined as: \begin{center} \begin{tabular}{c@ {\hspace{10mm}}c} @@ -292,6 +308,8 @@ \end{tabular} \end{center} + + *} section {* Finite Partitions Imply Regularity of a Language *} @@ -365,8 +383,8 @@ \end{center} \noindent - where the pairs @{text "(Y\<^isub>i\<^isub>j, r\<^isub>i\<^isub>j)"} stand for all transitions - @{term "Y\<^isub>i\<^isub>j \r\<^isub>i\<^isub>j\ X\<^isub>i"}. The term @{text "\(EMPTY)"} acts as a marker for the equivalence + where the pairs @{text "(Y\<^isub>i\<^isub>j, CHAR c\<^isub>i\<^isub>j)"} stand for all transitions + @{term "Y\<^isub>i\<^isub>j \(CHAR c\<^isub>i\<^isub>j)\ X\<^isub>i"}. The term @{text "\(EMPTY)"} acts as a marker for the equivalence class containing @{text "[]"}. (Note that we mark, roughly speaking, the single ``initial'' state in the equational system, which is different from the method by Brzozowski \cite{Brzozowski64}, since for his purposes he needs to mark diff -r dc879cb59c9c -r 14b12b5de6d3 Paper/document/root.bib --- a/Paper/document/root.bib Tue Feb 08 16:49:18 2011 +0000 +++ b/Paper/document/root.bib Tue Feb 08 18:04:54 2011 +0000 @@ -49,3 +49,13 @@ pages = {481--494}, publisher = {ACM} } + +@inproceedings{Nipkow98, + author={T.~Nipkow}, + title={{V}erified {L}exical {A}nalysis}, + booktitle={Theorem Proving in Higher Order Logics}, + series=LNCS, + volume=1479, + pages={1--15}, + year=1998 +} \ No newline at end of file diff -r dc879cb59c9c -r 14b12b5de6d3 Paper/document/root.tex --- a/Paper/document/root.tex Tue Feb 08 16:49:18 2011 +0000 +++ b/Paper/document/root.tex Tue Feb 08 18:04:54 2011 +0000 @@ -36,8 +36,8 @@ introduce the subject by describing finite automata and only mentioning on the side a connection with regular expressions. Unfortunately, automata are a hassle for formalisations in HOL-based -theorem provers. The reason is that they need to be represented as graphs -or matrices, neither of which can be defined as inductive datatype. Also +theorem provers. The reason is that they need to be represented as graphs, +matrices or functions, none of which are inductive datatypes. Also operations, such as disjoint unions of graphs, are not easily formalisiable in HOL. In contrast, regular expressions can be defined conveniently as datatype and a corresponding reasoning infrastructure comes for