theory Slides8
\LARGE Verifying a Regular Expression\\[-1mm]
\LARGE Matcher and Formal Language\\[-1mm]
\LARGE Theory\\[5mm]
Christian Urban\\
\small Technical University of Munich, Germany
\small joint work with Chunhan Wu and Xingyuan Zhang from the PLA
University of Science and Technology in Nanjing
\frametitle{This Talk: 4 Points}
\item It is easy to make mistakes.\medskip
\item Theorem provers can prevent mistakes, {\bf if} the problem
is formulated so that it is suitable for theorem provers.\medskip
\item This re-formulation can be done, even in domains where
we least expect it.\medskip
\item Where theorem provers are superior to the {\color{gray}{(best)}} human reasoners. ;o)
{\footnotesize Bob Harper}\\[-2.5mm]
{\footnotesize (CMU)}
{\footnotesize Frank Pfenning}\\[-2.5mm]
{\footnotesize (CMU)}
\color{gray}{published a proof in\\ {\bf ACM Transactions on Computational Logic} (2005),
{\footnotesize Andrew Appel}\\[-2.5mm]
{\footnotesize (Princeton)}
\color{gray}{relied on their proof in a\\ {\bf security} critical application}
\frametitle{Proof-Carrying Code}
{\small\begin{tabular}{@ {}p{1.9cm}@ {}}\centering user: untrusted code\end{tabular}}
{\small\begin{tabular}{@ {}p{1.9cm}@ {}}\centering developer ---\\ web server\end{tabular}}
{\small\begin{tabular}{@ {}p{1.9cm}@ {}}\bf\centering proof- checker\end{tabular}}
\node at (3.8,3.0) [single arrow, fill=red,text=white, minimum height=3cm]{\bf code};
\node at (3.8,1.3) [single arrow, fill=red,text=white, minimum height=3cm]{\bf certificate};
\node at (3.8,1.9) {\small\color{gray}{\mbox{}\hspace{-1mm}a proof in LF}};
\node (def1) [node1] {\large\hspace{1mm}Spec\hspace{1mm}\mbox{}};
\node (proof1) [node1] {\large Proof};
\node (alg1) [node1] {\large\hspace{1mm}Alg\hspace{1mm}\mbox{}};
\onslide<4->{\node {\begin{tabular}{c}\small 1st\\[-2.5mm] \footnotesize solution\end{tabular}};
\onslide<4->{\node (def2) [node2] {\large Spec$^\text{+ex}$};
\onslide<4->{\node (proof2) [node1] {\large Proof};
\onslide<4->{\node (alg2) [node1] {\large\hspace{1mm}Alg\hspace{1mm}\mbox{}};
\onslide<5->{\node {\begin{tabular}{c}\small 2nd\\[-2.5mm] \footnotesize solution\end{tabular}};
\onslide<5->{\node (def3) [node1] {\large\hspace{1mm}Spec\hspace{1mm}\mbox{}};
\onslide<5->{\node (proof3) [node1] {\large Proof};
\onslide<5->{\node (alg3) [node2] {\large Alg$^\text{-ex}$};
\onslide<6->{\node {\begin{tabular}{c}\small 3rd\\[-2.5mm] \footnotesize solution\end{tabular}};
\onslide<6->{\node (def4) [node1] {\large\hspace{1mm}Spec\hspace{1mm}\mbox{}};
\onslide<6->{\node (proof4) [node2] {\large\hspace{1mm}Proof\hspace{1mm}};
\onslide<6->{\node (alg4) [node1] {\large\hspace{1mm}Alg\hspace{1mm}\mbox{}};
atom_decl name
nominal_datatype lam =
Var "name"
| App "lam" "lam"
| Lam "\<guillemotleft>name\<guillemotright>lam" ("Lam [_]._" [100,100] 100)
subst :: "lam \<Rightarrow> name \<Rightarrow> lam \<Rightarrow> lam" ("_[_::=_]")
"(Var x)[y::=s] = (if x=y then s else (Var x))"
| "(App t\<^isub>1 t\<^isub>2)[y::=s] = App (t\<^isub>1[y::=s]) (t\<^isub>2[y::=s])"
| "x\<sharp>(y,s) \<Longrightarrow> (Lam [x].t)[y::=s] = Lam [x].(t[y::=s])"
apply(rule TrueI)+
apply(simp add: abs_fresh)
lemma subst_eqvt[eqvt]:
fixes pi::"name prm"
shows "pi\<bullet>(t1[x::=t2]) = (pi\<bullet>t1)[(pi\<bullet>x)::=(pi\<bullet>t2)]"
by (nominal_induct t1 avoiding: x t2 rule: lam.strong_induct)
(auto simp add: perm_bij fresh_atm fresh_bij)
lemma fresh_fact:
fixes z::"name"
shows "\<lbrakk>z\<sharp>s; (z=y \<or> z\<sharp>t)\<rbrakk> \<Longrightarrow> z\<sharp>t[y::=s]"
by (nominal_induct t avoiding: z y s rule: lam.strong_induct)
(auto simp add: abs_fresh fresh_prod fresh_atm)
lemma forget:
assumes asm: "x\<sharp>L"
shows "L[x::=P] = L"
using asm
by (nominal_induct L avoiding: x P rule: lam.strong_induct)
(auto simp add: abs_fresh fresh_atm)
lemma substitution_lemma_not_to_be_tried_at_home:
assumes asm: "x\<noteq>y" "x\<sharp>L"
shows "M[x::=N][y::=L] = M[y::=L][x::=N[y::=L]]"
using asm
proof (induct M arbitrary: x y N L rule: lam.induct)
case (Lam z M1)
have ih: "\<And>x y N L. \<lbrakk>x\<noteq>y; x\<sharp>L\<rbrakk> \<Longrightarrow> M1[x::=N][y::=L] = M1[y::=L][x::=N[y::=L]]" by fact
have "x\<noteq>y" by fact
have "x\<sharp>L" by fact
obtain z'::"name" where fc: "z'\<sharp>(x,y,z,M1,N,L)" by (rule exists_fresh) (auto simp add: fs_name1)
have eq: "Lam [z'].([(z',z)]\<bullet>M1) = Lam [z].M1" using fc
by (auto simp add: lam.inject alpha fresh_prod fresh_atm)
have fc': "z'\<sharp>N[y::=L]" using fc by (simp add: fresh_fact fresh_prod)
have "([(z',z)]\<bullet>x) \<noteq> ([(z',z)]\<bullet>y)" using `x\<noteq>y` by (auto simp add: calc_atm)
have "([(z',z)]\<bullet>x)\<sharp>([(z',z)]\<bullet>L)" using `x\<sharp>L` by (simp add: fresh_bij)
have "M1[([(z',z)]\<bullet>x)::=([(z',z)]\<bullet>N)][([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)]
= M1[([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)][([(z',z)]\<bullet>x)::=([(z',z)]\<bullet>N)[([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)]]"
using ih by simp
then have "[(z',z)]\<bullet>(M1[([(z',z)]\<bullet>x)::=([(z',z)]\<bullet>N)][([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)]
= M1[([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)][([(z',z)]\<bullet>x)::=([(z',z)]\<bullet>N)[([(z',z)]\<bullet>y)::=([(z',z)]\<bullet>L)]])"
by (simp add: perm_bool)
then have ih': "([(z',z)]\<bullet>M1)[x::=N][y::=L] = ([(z',z)]\<bullet>M1)[y::=L][x::=N[y::=L]]"
by (simp add: eqvts perm_swap)
show "(Lam [z].M1)[x::=N][y::=L] = (Lam [z].M1)[y::=L][x::=N[y::=L]]" (is "?LHS=?RHS")
proof -
have "?LHS = (Lam [z'].([(z',z)]\<bullet>M1))[x::=N][y::=L]" using eq by simp
also have "\<dots> = Lam [z'].(([(z',z)]\<bullet>M1)[x::=N][y::=L])" using fc by (simp add: fresh_prod)
also from ih have "\<dots> = Lam [z'].(([(z',z)]\<bullet>M1)[y::=L][x::=N[y::=L]])" sorry
also have "\<dots> = (Lam [z'].([(z',z)]\<bullet>M1))[y::=L][x::=N[y::=L]]" using fc fc' by (simp add: fresh_prod)
also have "\<dots> = ?RHS" using eq by simp
finally show "?LHS = ?RHS" .
qed (auto simp add: forget)
lemma substitution_lemma\<iota>:
assumes asm: "x \<noteq> y" "x \<sharp> L"
shows "M[x::=N][y::=L] = M[y::=L][x::=N[y::=L]]"
using asm
by (nominal_induct M avoiding: x y N L rule: lam.strong_induct)
(auto simp add: forget fresh_fact)
\frametitle{Lesson Learned}
Theorem provers can keep large proofs and definitions consistent and
make them modifiable.
In most papers/books:
``\ldots this necessary hygienic discipline is somewhat swept under the carpet via
the so-called `{\bf variable convention}' \ldots
The {\color{black}{\bf belief}} that this is {\bf sound} came from the calculus
with nameless binders in de Bruijn''
\frametitle{Regular Expressions}
\begin{tabular}{@ {}rrl}
\bl{r} & \bl{$::=$} & \bl{$\varnothing$}\\
& \bl{$\mid$} & \bl{[]}\\
& \bl{$\mid$} & \bl{c}\\
& \bl{$\mid$} & \bl{r$_1$ + r$_2$}\\
& \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$}\\
& \bl{$\mid$} & \bl{r$^*$}\\
\footnotesize Isabelle:
\bl{matches r s $\;\Longrightarrow\;$ true $\vee$ false}\\[3.5mm]
\begin{tabular}{r@ {\hspace{0.5mm}}r@ {\hspace{1.5mm}}c@ {\hspace{1.5mm}}l}
\multicolumn{4}{c}{rexp $\Rightarrow$ set of strings}\bigskip\\
&\bl{\LL ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\
&\bl{\LL ([])} & \bl{$\dn$} & \bl{\{[]\}}\\
&\bl{\LL (c)} & \bl{$\dn$} & \bl{\{c\}}\\
&\bl{\LL (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{\LL (r$_1$) $\cup$ \LL (r$_2$)}\\
\rd{$\Rightarrow$} &\bl{\LL (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{\LL (r$_1$) ;; \LL (r$_2$)}\\
\rd{$\Rightarrow$} &\bl{\LL (r$^*$)} & \bl{$\dn$} & \bl{(\LL (r))$^\star$}\\
\frametitle{Version 1}
\begin{tabular}{@ {\hspace{-5mm}}l@ {\hspace{2.5mm}}c@ {\hspace{2.5mm}}l@ {}}
\bl{match [] []} & \bl{$=$} & \bl{true}\\
\bl{match [] (c::s)} & \bl{$=$} & \bl{false}\\
\bl{match ($\varnothing$::rs) s} & \bl{$=$} & \bl{false}\\
\bl{match ([]::rs) s} & \bl{$=$} & \bl{match rs s}\\
\bl{match (c::rs) []} & \bl{$=$} & \bl{false}\\
\bl{match (c::rs) (d::s)} & \bl{$=$} & \bl{if c = d then match rs s else false}\\
\bl{match (r$_1$ + r$_2$::rs) s} & \bl{$=$} & \bl{match (r$_1$::rs) s $\vee$ match (r$_2$::rs) s}\\
\bl{match (r$_1$ $\cdot$ r$_2$::rs) s} & \bl{$=$} & \bl{match (r$_1$::r$_2$::rs) s}\\
\bl{match (r$^*$::rs) s} & \bl{$=$} & \bl{match rs s $\vee$ match (r::r$^*$::rs) s}\\
\bl{matches$_1$ r s $\;=\;$ match [r] s}
Every good programmer should do thourough tests:
\begin{tabular}{@ {\hspace{-20mm}}lcl}
\bl{matches$_1$ (a$\cdot$b)$^*\;$ []} & \bl{$\mapsto$} & \bl{true}\\
\bl{matches$_1$ (a$\cdot$b)$^*\;$ ab} & \bl{$\mapsto$} & \bl{true}\\
\bl{matches$_1$ (a$\cdot$b)$^*\;$ aba} & \bl{$\mapsto$} & \bl{false}\\
\bl{matches$_1$ (a$\cdot$b)$^*\;$ abab} & \bl{$\mapsto$} & \bl{true}\\
\bl{matches$_1$ (a$\cdot$b)$^*\;$ abaa} & \bl{$\mapsto$} & \bl{false}\medskip\\
\onslide<2->{\bl{matches$_1$ x$\cdot$(0$|$1)$^*\;$ x} & \bl{$\mapsto$} & \bl{true}}\\
\onslide<2->{\bl{matches$_1$ x$\cdot$(0$|$1)$^*\;$ x0} & \bl{$\mapsto$} & \bl{true}}\\
\onslide<2->{\bl{matches$_1$ x$\cdot$(0$|$1)$^*\;$ x3} & \bl{$\mapsto$} & \bl{false}}
{Looks OK \ldots let's ship it to customers\hspace{5mm}
\frametitle{Version 1}
\only<1->{Several hours later\ldots}\pause
\begin{tabular}{@ {\hspace{0mm}}lcl}
\bl{matches$_1$ []$^*$ s} & \bl{$\mapsto$} & loops\\
\onslide<4->{\bl{matches$_1$ ([] + \ldots)$^*$ s} & \bl{$\mapsto$} & loops\\}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
\bl{match ([]::rs) s} & \bl{$=$} & \bl{match rs s}\\
\bl{match (r$^*$::rs) s} & \bl{$=$} & \bl{match rs s $\vee$ match (r::r$^*$::rs) s}\\
\item We can only test a {\bf finite} amount of examples:\bigskip
``Testing can only show the presence of errors, never their
absence.'' (Edsger W.~Dijkstra)
\item In a theorem prover we can establish properties that apply to
{\bf all} input and {\bf all} output.
\frametitle{Version 2}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}ll@ {}}
\bl{nullable ($\varnothing$)} & \bl{$=$} & \bl{false} &\\
\bl{nullable ([])} & \bl{$=$} & \bl{true} &\\
\bl{nullable (c)} & \bl{$=$} & \bl{false} &\\
\bl{nullable (r$_1$ + r$_2$)} & \bl{$=$} & \bl{nullable r$_1$ $\vee$ nullable r$_2$} & \\
\bl{nullable (r$_1$ $\cdot$ r$_2$)} & \bl{$=$} & \bl{nullable r$_1$ $\wedge$ nullable r$_2$} & \\
\bl{nullable (r$^*$)} & \bl{$=$} & \bl{true} & \\
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
\bl{der c ($\varnothing$)} & \bl{$=$} & \bl{$\varnothing$} & \\
\bl{der c ([])} & \bl{$=$} & \bl{$\varnothing$} & \\
\bl{der c (d)} & \bl{$=$} & \bl{if c = d then [] else $\varnothing$} & \\
\bl{der c (r$_1$ + r$_2$)} & \bl{$=$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\
\bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$=$} & \bl{((der c r$_1$) $\cdot$ r$_2$)} & \\
& & \bl{\;\;\;\;+ (if nullable r$_1$ then der c r$_2$ else $\varnothing$)}\\
\bl{der c (r$^*$)} & \bl{$=$} & \bl{(der c r) $\cdot$ r$^*$} &\smallskip\\
\bl{derivative r []} & \bl{$=$} & \bl{r} & \\
\bl{derivative r (c::s)} & \bl{$=$} & \bl{derivative (der c r) s} & \\
\bl{matches$_2$ r s $=$ nullable (derivative r s)}
\color{gray}``if r matches []''
\color{gray}``derivative w.r.t.~a char''
\color{gray}``deriv.~w.r.t.~a string''
\frametitle{Is the Matcher Error-Free?}
We expect that
\bl{matches$_2$ r s = true} & \only<1>{\rd{$\Longrightarrow\,\,$}}\only<2>{\rd{$\Longleftarrow\,\,$}}%
\only<3->{\rd{$\Longleftrightarrow$}} & \bl{s $\in$ \LL(r)}\\
\bl{matches$_2$ r s = false} & \only<1>{\rd{$\Longrightarrow\,\,$}}\only<2>{\rd{$\Longleftarrow\,\,$}}%
\only<3->{\rd{$\Longleftrightarrow$}} & \bl{s $\notin$ \LL(r)}\\
By \alert<4->{induction}, we can {\bf prove} these properties.\bigskip
Lemmas: & \bl{nullable (r)} & \bl{$\Longleftrightarrow$} & \bl{[] $\in$ \LL (r)}\\
& \bl{s $\in$ \LL (der c r)} & \bl{$\Longleftrightarrow$} & \bl{(c::s) $\in$ \LL (r)}\\
\rd{\huge$\forall$\large{}r s.}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}ll@ {}}
\bl{nullable (NULL)} & \bl{$=$} & \bl{false} &\\
\bl{nullable (EMPTY)} & \bl{$=$} & \bl{true} &\\
\bl{nullable (CHR c)} & \bl{$=$} & \bl{false} &\\
\bl{nullable (ALT r$_1$ r$_2$)} & \bl{$=$} & \bl{(nullable r$_1$) orelse (nullable r$_2$)} & \\
\bl{nullable (SEQ r$_1$ r$_2$)} & \bl{$=$} & \bl{(nullable r$_1$) andalso (nullable r$_2$)} & \\
\bl{nullable (STAR r)} & \bl{$=$} & \bl{true} & \\
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
\bl{der c (NULL)} & \bl{$=$} & \bl{NULL} & \\
\bl{der c (EMPTY)} & \bl{$=$} & \bl{NULL} & \\
\bl{der c (CHR d)} & \bl{$=$} & \bl{if c=d then EMPTY else NULL} & \\
\bl{der c (ALT r$_1$ r$_2$)} & \bl{$=$} & \bl{ALT (der c r$_1$) (der c r$_2$)} & \\
\bl{der c (SEQ r$_1$ r$_2$)} & \bl{$=$} & \bl{ALT (SEQ (der c r$_1$) r$_2$)} & \\
& & \bl{\phantom{ALT} (if nullable r$_1$ then der c r$_2$ else NULL)}\\
\bl{der c (STAR r)} & \bl{$=$} & \bl{SEQ (der c r) (STAR r)} &\smallskip\\
\bl{derivative r []} & \bl{$=$} & \bl{r} & \\
\bl{derivative r (c::s)} & \bl{$=$} & \bl{derivative (der c r) s} & \\
\bl{matches r s $=$ nullable (derivative r s)}
\frametitle{No Automata?}
You might be wondering why I did not use any automata?
\item {\bf Def.:} A \alert{regular language} is one where there is a DFA that
recognises it.\bigskip\pause
There are many reasons why this is a good definition:\medskip
\item pumping lemma
\item closure properties of regular languages\\ (e.g.~closure under complement)
\frametitle{Really Bad News!}
DFAs are bad news for formalisations in theorem provers. They might
be represented as:
\item graphs
\item matrices
\item partial functions
All constructions are messy to reason about.\bigskip\bigskip
Constable et al needed (on and off) 18 months for a 3-person team
to formalise automata theory in Nuprl including Myhill-Nerode. There is
only very little other formalised work on regular languages I know of
in Coq, Isabelle and HOL.}
\only<3>{Typical textbook reasoning goes like: ``\ldots if \smath{M} and \smath{N} are any two
automata with no inaccessible states \ldots''
My point:\bigskip\\
The theory about regular languages can be reformulated
to be more\\ suitable for theorem proving.
\frametitle{\LARGE The Myhill-Nerode Theorem}
\item provides necessary and suf\!ficient conditions for a language
being regular (pumping lemma only necessary)\medskip
\item will help with closure properties of regular languages\bigskip\pause
\item key is the equivalence relation:\smallskip
\smath{x \approx_{L} y \,\dn\, \forall z.\; x @ z \in L \Leftrightarrow y @ z \in L}
\frametitle{\LARGE The Myhill-Nerode Theorem}
\item \smath{\text{finite}\, (U\!N\!IV /\!/ \approx_L) \;\Leftrightarrow\; L\; \text{is regular}}
\frametitle{\LARGE Equivalence Classes}
\item \smath{L = []}
\smath{\Big\{\{[]\},\; U\!N\!IV - \{[]\}\Big\}}
\item \smath{L = [c]}
\smath{\Big\{\{[]\},\; \{[c]\},\; U\!N\!IV - \{[], [c]\}\Big\}}
\item \smath{L = \varnothing}
\frametitle{\LARGE Regular Languages}
\item \smath{L} is regular \smath{\dn} if there is an automaton \smath{M}
such that \smath{\mathbb{L}(M) = L}\\[1.5cm]
\item Myhill-Nerode:
finite $\Rightarrow$ regular\\
\;\;\;\smath{\text{finite}\,(U\!N\!IV /\!/ \approx_L) \Rightarrow \exists r.\; L = \mathbb{L}(r)}\\[3mm]
regular $\Rightarrow$ finite\\
\;\;\;\smath{\text{finite}\, (U\!N\!IV /\!/ \approx_{\mathbb{L}(r)})}
\frametitle{\LARGE Final Equiv.~Classes}
\item \smath{\text{finals}\,L \dn
\{{\lbrack\mkern-2mu\lbrack{s}\rbrack\mkern-2mu\rbrack}_\approx\;|\; s \in L\}}\\
\item we can prove: \smath{L = \bigcup (\text{finals}\,L)}
\frametitle{\LARGE Transitions between ECs}
\smath{L = \{[c]\}}
\node[state,initial] (q_0) {$R_1$};
\node[state,accepting] (q_1) [above right of=q_0] {$R_2$};
\node[state] (q_2) [below right of=q_0] {$R_3$};
\path[->] (q_0) edge node {c} (q_1)
edge node [swap] {$\Sigma-{c}$} (q_2)
(q_2) edge [loop below] node {$\Sigma$} ()
(q_1) edge node {$\Sigma$} (q_2);
\multicolumn{2}{l}{\smath{U\!N\!IV /\!/\approx_L} produces}\\[4mm]
\smath{R_1}: & \smath{\{[]\}}\\
\smath{R_2}: & \smath{\{[c]\}}\\
\smath{R_3}: & \smath{U\!N\!IV - \{[], [c]\}}\\[6mm]
\multicolumn{2}{l}{\onslide<2->{\smath{X \stackrel{c}{\longrightarrow} Y \dn X ;; [c] \subseteq Y}}}
\frametitle{\LARGE Systems of Equations}
Inspired by a method of Brzozowski\;'64, we can build an equational system
characterising the equivalence classes:
\node[state,initial] (p_0) {$R_1$};
\node[state,accepting] (p_1) [right of=q_0] {$R_2$};
\path[->] (p_0) edge [bend left] node {a} (p_1)
edge [loop above] node {b} ()
(p_1) edge [loop above] node {a} ()
edge [bend left] node {b} (p_0);
\begin{tabular}{@ {\hspace{-6mm}}ll@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
& \smath{R_1} & \smath{\equiv} & \smath{R_1;b + R_2;b \onslide<2->{\alert<2>{+ \lambda;[]}}}\\
& \smath{R_2} & \smath{\equiv} & \smath{R_1;a + R_2;a}\medskip\\
\onslide<3->{we can prove}
& \onslide<3->{\smath{R_1}} & \onslide<3->{\smath{=}}
& \onslide<3->{\smath{R_1;; \mathbb{L}(b) \,\cup\, R_2;;\mathbb{L}(b) \,\cup\, \{[]\}}}\\
& \onslide<3->{\smath{R_2}} & \onslide<3->{\smath{=}}
& \onslide<3->{\smath{R_1;; \mathbb{L}(a) \,\cup\, R_2;;\mathbb{L}(a)}}\\
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}ll}
\onslide<1->{\smath{R_1}} & \onslide<1->{\smath{=}}
& \onslide<1->{\smath{R_1; b + R_2; b + \lambda;[]}}\\
\onslide<1->{\smath{R_2}} & \onslide<1->{\smath{=}}
& \onslide<1->{\smath{R_1; a + R_2; a}}\\
& & & \onslide<2->{by Arden}\\
\onslide<2->{\smath{R_1}} & \onslide<2->{\smath{=}}
& \onslide<2->{\smath{R_1; b + R_2; b + \lambda;[]}}\\
\onslide<2->{\smath{R_2}} & \onslide<2->{\smath{=}}
& \only<2>{\smath{R_1; a + R_2; a}}%
\only<3->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<4->{by Arden}\\
\onslide<4->{\smath{R_1}} & \onslide<4->{\smath{=}}
& \onslide<4->{\smath{R_2; b \cdot b^\star+ \lambda;b^\star}}\\
\onslide<4->{\smath{R_2}} & \onslide<4->{\smath{=}}
& \onslide<4->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<5->{by substitution}\\
\onslide<5->{\smath{R_1}} & \onslide<5->{\smath{=}}
& \onslide<5->{\smath{R_1; a\cdot a^\star \cdot b \cdot b^\star+ \lambda;b^\star}}\\
\onslide<5->{\smath{R_2}} & \onslide<5->{\smath{=}}
& \onslide<5->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<6->{by Arden}\\
\onslide<6->{\smath{R_1}} & \onslide<6->{\smath{=}}
& \onslide<6->{\smath{\lambda;b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star}}\\
\onslide<6->{\smath{R_2}} & \onslide<6->{\smath{=}}
& \onslide<6->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<7->{by substitution}\\
\onslide<7->{\smath{R_1}} & \onslide<7->{\smath{=}}
& \onslide<7->{\smath{\lambda;b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star}}\\
\onslide<7->{\smath{R_2}} & \onslide<7->{\smath{=}}
& \onslide<7->{\smath{\lambda; b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star
\cdot a\cdot a^\star}}\\
\frametitle{\LARGE A Variant of Arden's Lemma}
{\bf Arden's Lemma:}\smallskip
If \smath{[] \not\in A} then
\smath{X = X; A + \text{something}}
has the (unique) solution
\smath{X = \text{something} ; A^\star}
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}ll}
\onslide<1->{\smath{R_1}} & \onslide<1->{\smath{=}}
& \onslide<1->{\smath{R_1; b + R_2; b + \lambda;[]}}\\
\onslide<1->{\smath{R_2}} & \onslide<1->{\smath{=}}
& \onslide<1->{\smath{R_1; a + R_2; a}}\\
& & & \onslide<2->{by Arden}\\
\onslide<2->{\smath{R_1}} & \onslide<2->{\smath{=}}
& \onslide<2->{\smath{R_1; b + R_2; b + \lambda;[]}}\\
\onslide<2->{\smath{R_2}} & \onslide<2->{\smath{=}}
& \only<2>{\smath{R_1; a + R_2; a}}%
\only<3->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<4->{by Arden}\\
\onslide<4->{\smath{R_1}} & \onslide<4->{\smath{=}}
& \onslide<4->{\smath{R_2; b \cdot b^\star+ \lambda;b^\star}}\\
\onslide<4->{\smath{R_2}} & \onslide<4->{\smath{=}}
& \onslide<4->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<5->{by substitution}\\
\onslide<5->{\smath{R_1}} & \onslide<5->{\smath{=}}
& \onslide<5->{\smath{R_1; a\cdot a^\star \cdot b \cdot b^\star+ \lambda;b^\star}}\\
\onslide<5->{\smath{R_2}} & \onslide<5->{\smath{=}}
& \onslide<5->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<6->{by Arden}\\
\onslide<6->{\smath{R_1}} & \onslide<6->{\smath{=}}
& \onslide<6->{\smath{\lambda;b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star}}\\
\onslide<6->{\smath{R_2}} & \onslide<6->{\smath{=}}
& \onslide<6->{\smath{R_1; a\cdot a^\star}}\\
& & & \onslide<7->{by substitution}\\
\onslide<7->{\smath{R_1}} & \onslide<7->{\smath{=}}
& \onslide<7->{\smath{\lambda;b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star}}\\
\onslide<7->{\smath{R_2}} & \onslide<7->{\smath{=}}
& \onslide<7->{\smath{\lambda; b^\star\cdot (a\cdot a^\star \cdot b \cdot b^\star)^\star
\cdot a\cdot a^\star}}\\
\node[state,initial] (p_0) {$R_1$};
\node[state,accepting] (p_1) [right of=q_0] {$R_2$};
\path[->] (p_0) edge [bend left] node {a} (p_1)
edge [loop above] node {b} ()
(p_1) edge [loop above] node {a} ()
edge [bend left] node {b} (p_0);
\frametitle{\LARGE The Equ's Solving Algorithm}
\item The algorithm must terminate: Arden makes one equation smaller;
substitution deletes one variable from the right-hand sides.\bigskip
\item We need to maintain the invariant that Arden is applicable
(if \smath{[] \not\in A} then \ldots):\medskip
\begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}ll}
\smath{R_1} & \smath{=} & \smath{R_1; b + R_2; b + \lambda;[]}\\
\smath{R_2} & \smath{=} & \smath{R_1; a + R_2; a}\\
& & & by Arden\\
\smath{R_1} & \smath{=} & \smath{R_1; b + R_2; b + \lambda;[]}\\
\smath{R_2} & \smath{=} & \smath{R_1; a\cdot a^\star}\\
\frametitle{\LARGE Other Direction}
One has to prove
\smath{\text{finite} (U\!N\!IV /\!/ \approx_{\mathbb{L}(r)})}
by induction on \smath{r}. Not trivial, but after a bit
of thinking, one can prove that if
\smath{\text{finite} (U\!N\!IV /\!/ \approx_{\mathbb{L}(r_1)})}\hspace{5mm}
\smath{\text{finite} (U\!N\!IV /\!/ \approx_{\mathbb{L}(r_2)})}
\smath{\text{finite} (U\!N\!IV /\!/ \approx_{\mathbb{L}(r_1) \,\cup\, \mathbb{L}(r_2)})}
\frametitle{\LARGE What Have We Achieved?}
\item \smath{\text{finite}\, (U\!N\!IV /\!/ \approx_L) \;\Leftrightarrow\; L\; \text{is regular}}
\item regular languages are closed under complementation; this is now easy\medskip
\smath{U\!N\!IV /\!/ \approx_L \;\;=\;\; U\!N\!IV /\!/ \approx_{-L}}
\frametitle{\LARGE Examples}
\item \smath{L \equiv \Sigma^\star 0 \Sigma} is regular
\smath{A_1} & \smath{=} & \smath{\Sigma^\star 00}\\
\smath{A_2} & \smath{=} & \smath{\Sigma^\star 01}\\
\smath{A_3} & \smath{=} & \smath{\Sigma^\star 10 \cup \{0\}}\\
\smath{A_4} & \smath{=} & \smath{\Sigma^\star 11 \cup \{1\} \cup \{[]\}}\\
\item \smath{L \equiv \{ 0^n 1^n \,|\, n \ge 0\}} is not regular
\smath{B_0} & \smath{=} & \smath{\{0^n 1^n \,|\, n \ge 0\}}\\
\smath{B_1} & \smath{=} & \smath{\{0^n 1^{(n-1)} \,|\, n \ge 1\}}\\
\smath{B_2} & \smath{=} & \smath{\{0^n 1^{(n-2)} \,|\, n \ge 2\}}\\
\smath{B_3} & \smath{=} & \smath{\{0^n 1^{(n-3)} \,|\, n \ge 3\}}\\
& \smath{\vdots} &\\
\frametitle{\LARGE What We Have Not Achieved}
\item regular expressions are not good if you look for a minimal
one for a language (DFAs have this notion)\pause\bigskip
\item Is there anything to be said about context free languages:\medskip
A context free language is where every string can be recognised by
a pushdown automaton.\bigskip
\textcolor{gray}{\footnotesize Yes. Derivatives also work for c-f grammars. Ongoing work.}
\frametitle{\LARGE Conclusion}
\item We formalised the Myhill-Nerode theorem based on
regular expressions only (DFAs are difficult to deal with in a theorem prover).\smallskip
\item Seems to be a common theme: algorithms need to be reformulated
to better suit formal treatment.\smallskip
\item The most interesting aspect is that we are able to
implement the matcher directly inside the theorem prover
(ongoing work).\smallskip
\item Parsing is a vast field which seem to offer new results.
\alert{\LARGE Thank you very much!}\\
\alert{\Large Questions?}