Paper.thy
changeset 48 559e5c6e5113
parent 47 251e192339b7
child 49 b388dceee892
equal deleted inserted replaced
47:251e192339b7 48:559e5c6e5113
     1 (*<*)
       
     2 theory Paper
       
     3 imports UTM
       
     4 begin
       
     5 
       
     6 hide_const (open) s 
       
     7 
       
     8 abbreviation 
       
     9   "update p a == new_tape a p"
       
    10 
       
    11 lemma fetch_def2: 
       
    12   shows "fetch p 0 b == (Nop, 0)"
       
    13   and "fetch p (Suc s) Bk == 
       
    14      (case nth_of p (2 * s) of
       
    15         Some i \<Rightarrow> i
       
    16       | None \<Rightarrow> (Nop, 0))"
       
    17   and "fetch p (Suc s) Oc == 
       
    18      (case nth_of p ((2 * s) + 1) of
       
    19          Some i \<Rightarrow> i
       
    20        | None \<Rightarrow> (Nop, 0))"
       
    21 apply -
       
    22 apply(rule_tac [!] eq_reflection)
       
    23 by (auto split: block.splits simp add: fetch.simps)
       
    24 
       
    25 lemma new_tape_def2: 
       
    26   shows "new_tape W0 (l, r) == (l, Bk#(tl r))" 
       
    27   and "new_tape W1 (l, r) == (l, Oc#(tl r))"
       
    28   and "new_tape L (l, r) == 
       
    29      (if l = [] then ([], Bk#r) else (tl l, (hd l) # r))" 
       
    30   and "new_tape R (l, r) ==
       
    31      (if r = [] then (Bk#l,[]) else ((hd r)#l, tl r))" 
       
    32   and "new_tape Nop (l, r) == (l, r)"
       
    33 apply -
       
    34 apply(rule_tac [!] eq_reflection)
       
    35 apply(auto split: taction.splits simp add: new_tape.simps)
       
    36 done
       
    37 
       
    38 
       
    39 abbreviation 
       
    40   "read r == if (r = []) then Bk else hd r"
       
    41 
       
    42 lemma tstep_def2:
       
    43   shows "tstep (s, l, r) p == (let (a, s') = fetch p s (read r) in (s', new_tape a (l, r)))"
       
    44 apply -
       
    45 apply(rule_tac [!] eq_reflection)
       
    46 by (auto split: if_splits prod.split list.split simp add: tstep.simps)
       
    47 
       
    48 abbreviation
       
    49  "run p inp out == \<exists>n. steps (1, inp) p n = (0, out)"
       
    50 
       
    51 lemma haltP_def2:
       
    52   "haltP p n = (\<exists>k l m. 
       
    53      run p ([], exponent Oc n) (exponent Bk k, exponent Oc l @ exponent Bk m))"
       
    54 unfolding haltP_def 
       
    55 apply(auto)
       
    56 done
       
    57 
       
    58 lemma tape_of_nat_list_def2:
       
    59   shows "tape_of_nat_list [] = []" 
       
    60   and "tape_of_nat_list [n] = exponent Oc (n+1)"
       
    61   and "ns\<noteq> [] ==> tape_of_nat_list (n#ns) = (exponent Oc (n+1)) @ [Bk] @ (tape_of_nat_list ns)"
       
    62 apply(auto simp add: tape_of_nat_list.simps)
       
    63 apply(case_tac ns)
       
    64 apply(auto simp add: tape_of_nat_list.simps)
       
    65 done
       
    66 
       
    67 lemma tshift_def2:
       
    68   fixes p::"tprog"
       
    69   shows "tshift p n == (map (\<lambda> (a, s). (a, (if s = 0 then 0 else s + n))) p)"
       
    70 apply(rule eq_reflection)
       
    71 apply(auto simp add: tshift.simps)
       
    72 done
       
    73 
       
    74 lemma change_termi_state_def2:
       
    75  "change_termi_state p  == 
       
    76   (map (\<lambda> (a, s). (a, if s = 0 then ((length p) div 2) + 1 else s)) p)"
       
    77 apply(rule eq_reflection)
       
    78 apply(auto simp add: change_termi_state.simps)
       
    79 done
       
    80 
       
    81 
       
    82 
       
    83 consts DUMMY::'a
       
    84 
       
    85 notation (latex output)
       
    86   Cons ("_::_" [78,77] 73) and
       
    87   set ("") and
       
    88   W0 ("W\<^bsub>\<^raw:\hspace{-2pt}>Bk\<^esub>") and
       
    89   W1 ("W\<^bsub>\<^raw:\hspace{-2pt}>Oc\<^esub>") and
       
    90   t_correct ("twf") and 
       
    91   tstep ("step") and
       
    92   steps ("nsteps") and
       
    93   abc_lm_v ("lookup") and
       
    94   abc_lm_s ("set") and
       
    95   haltP ("stdhalt") and 
       
    96   tshift ("shift") and 
       
    97   tcopy ("copy") and 
       
    98   change_termi_state ("adjust") and
       
    99   tape_of_nat_list ("\<ulcorner>_\<urcorner>") and 
       
   100   t_add ("_ \<oplus> _") and 
       
   101   DUMMY  ("\<^raw:\mbox{$\_$}>")
       
   102 
       
   103 declare [[show_question_marks = false]]
       
   104 (*>*)
       
   105 
       
   106 section {* Introduction *}
       
   107 
       
   108 text {*
       
   109 
       
   110 \noindent
       
   111 We formalised in earlier work the correctness proofs for two
       
   112 algorithms in Isabelle/HOL---one about type-checking in
       
   113 LF~\cite{UrbanCheneyBerghofer11} and another about deciding requests
       
   114 in access control~\cite{WuZhangUrban12}.  The formalisations
       
   115 uncovered a gap in the informal correctness proof of the former and
       
   116 made us realise that important details were left out in the informal
       
   117 model for the latter. However, in both cases we were unable to
       
   118 formalise in Isabelle/HOL computability arguments about the
       
   119 algorithms. The reason is that both algorithms are formulated in terms
       
   120 of inductive predicates. Suppose @{text "P"} stands for one such
       
   121 predicate.  Decidability of @{text P} usually amounts to showing
       
   122 whether \mbox{@{term "P \<or> \<not>P"}} holds. But this does \emph{not} work
       
   123 in Isabelle/HOL, since it is a theorem prover based on classical logic
       
   124 where the law of excluded middle ensures that \mbox{@{term "P \<or> \<not>P"}}
       
   125 is always provable no matter whether @{text P} is constructed by
       
   126 computable means. The same problem would arise if we had formulated
       
   127 the algorithms as recursive functions, because internally in
       
   128 Isabelle/HOL, like in all HOL-based theorem provers, functions are
       
   129 represented as inductively defined predicates too.
       
   130 
       
   131 The only satisfying way out of this problem in a theorem prover based on classical
       
   132 logic is to formalise a theory of computability. Norrish provided such
       
   133 a formalisation for the HOL4 theorem prover. He choose the
       
   134 $\lambda$-calculus as the starting point for his formalisation
       
   135 of computability theory,
       
   136 because of its ``simplicity'' \cite[Page 297]{Norrish11}.  Part of his
       
   137 formalisation is a clever infrastructure for reducing
       
   138 $\lambda$-terms. He also established the computational equivalence
       
   139 between the $\lambda$-calculus and recursive functions.  Nevertheless he
       
   140 concluded that it would be ``appealing'' to have formalisations for more
       
   141 operational models of computations, such as Turing machines or register
       
   142 machines.  One reason is that many proofs in the literature use 
       
   143 them.  He noted however that in the context of theorem provers
       
   144 \cite[Page 310]{Norrish11}:
       
   145 
       
   146 \begin{quote}
       
   147 \it``If register machines are unappealing because of their 
       
   148 general fiddliness, Turing machines are an even more 
       
   149 daunting prospect.''
       
   150 \end{quote}
       
   151 
       
   152 \noindent
       
   153 In this paper we take on this daunting prospect and provide a
       
   154 formalisation of Turing machines, as well as abacus machines (a kind
       
   155 of register machines) and recursive functions. To see the difficulties
       
   156 involved with this work, one has to understand that interactive
       
   157 theorem provers, like Isabelle/HOL, are at their best when the
       
   158 data-structures at hand are ``structurally'' defined, like lists,
       
   159 natural numbers, regular expressions, etc. Such data-structures come
       
   160 with convenient reasoning infrastructures (for example induction
       
   161 principles, recursion combinators and so on).  But this is \emph{not}
       
   162 the case with Turing machines (and also not with register machines):
       
   163 underlying their definitions are sets of states together with 
       
   164 transition functions, all of which are not structurally defined.  This
       
   165 means we have to implement our own reasoning infrastructure in order
       
   166 to prove properties about them. This leads to annoyingly fiddly
       
   167 formalisations.  We noticed first the difference between both,
       
   168 structural and non-structural, ``worlds'' when formalising the
       
   169 Myhill-Nerode theorem, where regular expressions fared much better
       
   170 than automata \cite{WuZhangUrban11}.  However, with Turing machines
       
   171 there seems to be no alternative if one wants to formalise the great
       
   172 many proofs from the literature that use them.  We will analyse one
       
   173 example---undecidability of Wang's tiling problem---in Section~\ref{Wang}. The
       
   174 standard proof of this property uses the notion of universal
       
   175 Turing machines.
       
   176 
       
   177 We are not the first who formalised Turing machines in a theorem
       
   178 prover: we are aware of the preliminary work by Asperti and Ricciotti
       
   179 \cite{AspertiRicciotti12}. They describe a complete formalisation of
       
   180 Turing machines in the Matita theorem prover, including a universal
       
   181 Turing machine. They report that the informal proofs from which they
       
   182 started are \emph{not} ``sufficiently accurate to be directly usable as a
       
   183 guideline for formalization'' \cite[Page 2]{AspertiRicciotti12}. For
       
   184 our formalisation we followed mainly the proofs from the textbook
       
   185 \cite{Boolos87} and found that the description there is quite
       
   186 detailed. Some details are left out however: for example, it is only
       
   187 shown how the universal Turing machine is constructed for Turing
       
   188 machines computing unary functions. We had to figure out a way to
       
   189 generalise this result to $n$-ary functions. Similarly, when compiling
       
   190 recursive functions to abacus machines, the textbook again only shows
       
   191 how it can be done for 2- and 3-ary functions, but in the
       
   192 formalisation we need arbitrary functions. But the general ideas for
       
   193 how to do this are clear enough in \cite{Boolos87}. However, one
       
   194 aspect that is completely left out from the informal description in
       
   195 \cite{Boolos87}, and similar ones we are aware of, is arguments why certain Turing
       
   196 machines are correct. We will introduce Hoare-style proof rules
       
   197 which help us with such correctness arguments of Turing machines.
       
   198 
       
   199 The main difference between our formalisation and the one by Asperti
       
   200 and Ricciotti is that their universal Turing machine uses a different
       
   201 alphabet than the machines it simulates. They write \cite[Page
       
   202 23]{AspertiRicciotti12}:
       
   203 
       
   204 \begin{quote}\it
       
   205 ``In particular, the fact that the universal machine operates with a
       
   206 different alphabet with respect to the machines it simulates is
       
   207 annoying.'' 
       
   208 \end{quote}
       
   209 
       
   210 \noindent
       
   211 In this paper we follow the approach by Boolos et al \cite{Boolos87},
       
   212 which goes back to Post \cite{Post36}, where all Turing machines
       
   213 operate on tapes that contain only \emph{blank} or \emph{occupied} cells
       
   214 (represented by @{term Bk} and @{term Oc}, respectively, in our
       
   215 formalisation). Traditionally the content of a cell can be any
       
   216 character from a finite alphabet. Although computationally equivalent,
       
   217 the more restrictive notion of Turing machines in \cite{Boolos87} makes
       
   218 the reasoning more uniform. In addition some proofs \emph{about} Turing
       
   219 machines are simpler.  The reason is that one often needs to encode
       
   220 Turing machines---consequently if the Turing machines are simpler, then the coding
       
   221 functions are simpler too. Unfortunately, the restrictiveness also makes
       
   222 it harder to design programs for these Turing machines. In order
       
   223 to construct a universal Turing machine we therefore do not follow 
       
   224 \cite{AspertiRicciotti12}, instead follow the proof in
       
   225 \cite{Boolos87} by relating abacus machines to Turing machines and in
       
   226 turn recursive functions to abacus machines. The universal Turing
       
   227 machine can then be constructed as a recursive function.
       
   228 
       
   229 \smallskip
       
   230 \noindent
       
   231 {\bf Contributions:} We formalised in Isabelle/HOL Turing machines following the
       
   232 description of Boolos et al \cite{Boolos87} where tapes only have blank or
       
   233 occupied cells. We mechanise the undecidability of the halting problem and
       
   234 prove the correctness of concrete Turing machines that are needed
       
   235 in this proof; such correctness proofs are left out in the informal literature.  
       
   236 We construct the universal Turing machine from \cite{Boolos87} by
       
   237 relating recursive functions to abacus machines and abacus machines to
       
   238 Turing machines. Since we have set up in Isabelle/HOL a very general computability
       
   239 model and undecidability result, we are able to formalise the
       
   240 undecidability of Wang's tiling problem. We are not aware of any other
       
   241 formalisation of a substantial undecidability problem.
       
   242 *}
       
   243 
       
   244 section {* Turing Machines *}
       
   245 
       
   246 text {* \noindent
       
   247   Turing machines can be thought of as having a read-write-unit, also
       
   248   referred to as \emph{head},
       
   249   ``gliding'' over a potentially infinite tape. Boolos et
       
   250   al~\cite{Boolos87} only consider tapes with cells being either blank
       
   251   or occupied, which we represent by a datatype having two
       
   252   constructors, namely @{text Bk} and @{text Oc}.  One way to
       
   253   represent such tapes is to use a pair of lists, written @{term "(l,
       
   254   r)"}, where @{term l} stands for the tape on the left-hand side of the
       
   255   head and @{term r} for the tape on the right-hand side. We have the
       
   256   convention that the head, abbreviated @{term hd}, of the right-list is
       
   257   the cell on which the head of the Turing machine currently operates. This can
       
   258   be pictured as follows:
       
   259 
       
   260   \begin{center}
       
   261   \begin{tikzpicture}
       
   262   \draw[very thick] (-3.0,0)   -- ( 3.0,0);
       
   263   \draw[very thick] (-3.0,0.5) -- ( 3.0,0.5);
       
   264   \draw[very thick] (-0.25,0)   -- (-0.25,0.5);
       
   265   \draw[very thick] ( 0.25,0)   -- ( 0.25,0.5);
       
   266   \draw[very thick] (-0.75,0)   -- (-0.75,0.5);
       
   267   \draw[very thick] ( 0.75,0)   -- ( 0.75,0.5);
       
   268   \draw[very thick] (-1.25,0)   -- (-1.25,0.5);
       
   269   \draw[very thick] ( 1.25,0)   -- ( 1.25,0.5);
       
   270   \draw[very thick] (-1.75,0)   -- (-1.75,0.5);
       
   271   \draw[very thick] ( 1.75,0)   -- ( 1.75,0.5);
       
   272   \draw[rounded corners=1mm] (-0.35,-0.1) rectangle (0.35,0.6);
       
   273   \draw[fill]     (1.35,0.1) rectangle (1.65,0.4);
       
   274   \draw[fill]     (0.85,0.1) rectangle (1.15,0.4);
       
   275   \draw[fill]     (-0.35,0.1) rectangle (-0.65,0.4);
       
   276   \draw (-0.25,0.8) -- (-0.25,-0.8);
       
   277   \draw[<->] (-1.25,-0.7) -- (0.75,-0.7);
       
   278   \node [anchor=base] at (-0.8,-0.5) {\small left list};
       
   279   \node [anchor=base] at (0.35,-0.5) {\small right list};
       
   280   \node [anchor=base] at (0.1,0.7) {\small head};
       
   281   \node [anchor=base] at (-2.2,0.2) {\ldots};
       
   282   \node [anchor=base] at ( 2.3,0.2) {\ldots};
       
   283   \end{tikzpicture}
       
   284   \end{center}
       
   285   
       
   286   \noindent
       
   287   Note that by using lists each side of the tape is only finite. The
       
   288   potential infinity is achieved by adding an appropriate blank or occupied cell 
       
   289   whenever the head goes over the ``edge'' of the tape. To 
       
   290   make this formal we define five possible \emph{actions} 
       
   291   the Turing machine can perform:
       
   292 
       
   293   \begin{center}
       
   294   \begin{tabular}{rcll}
       
   295   @{text "a"} & $::=$  & @{term "W0"} & write blank (@{term Bk})\\
       
   296   & $\mid$ & @{term "W1"} & write occupied (@{term Oc})\\
       
   297   & $\mid$ & @{term L} & move left\\
       
   298   & $\mid$ & @{term R} & move right\\
       
   299   & $\mid$ & @{term Nop} & do-nothing operation\\
       
   300   \end{tabular}
       
   301   \end{center}
       
   302 
       
   303   \noindent
       
   304   We slightly deviate
       
   305   from the presentation in \cite{Boolos87} by using the @{term Nop} operation; however its use
       
   306   will become important when we formalise halting computations and also universal Turing 
       
   307   machines. Given a tape and an action, we can define the
       
   308   following tape updating function:
       
   309 
       
   310   \begin{center}
       
   311   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   312   @{thm (lhs) new_tape_def2(1)} & @{text "\<equiv>"} & @{thm (rhs) new_tape_def2(1)}\\
       
   313   @{thm (lhs) new_tape_def2(2)} & @{text "\<equiv>"} & @{thm (rhs) new_tape_def2(2)}\\
       
   314   @{thm (lhs) new_tape_def2(3)} & @{text "\<equiv>"} & \\
       
   315   \multicolumn{3}{l}{\hspace{1cm}@{thm (rhs) new_tape_def2(3)}}\\
       
   316   @{thm (lhs) new_tape_def2(4)} & @{text "\<equiv>"} & \\
       
   317   \multicolumn{3}{l}{\hspace{1cm}@{thm (rhs) new_tape_def2(4)}}\\
       
   318   @{thm (lhs) new_tape_def2(5)} & @{text "\<equiv>"} & @{thm (rhs) new_tape_def2(5)}\\
       
   319   \end{tabular}
       
   320   \end{center}
       
   321 
       
   322   \noindent
       
   323   The first two clauses replace the head of the right-list
       
   324   with a new @{term Bk} or @{term Oc}, respectively. To see that
       
   325   these two clauses make sense in case where @{text r} is the empty
       
   326   list, one has to know that the tail function, @{term tl}, is defined in
       
   327   Isabelle/HOL
       
   328   such that @{term "tl [] == []"} holds. The third clause 
       
   329   implements the move of the head one step to the left: we need
       
   330   to test if the left-list @{term l} is empty; if yes, then we just prepend a 
       
   331   blank cell to the right-list; otherwise we have to remove the
       
   332   head from the left-list and prepend it to the right-list. Similarly
       
   333   in the fourth clause for a right move action. The @{term Nop} operation
       
   334   leaves the the tape unchanged (last clause).
       
   335 
       
   336   Note that our treatment of the tape is rather ``unsymmetric''---we
       
   337   have the convention that the head of the right-list is where the
       
   338   head is currently positioned. Asperti and Ricciotti
       
   339   \cite{AspertiRicciotti12} also considered such a representation, but
       
   340   dismiss it as it complicates their definition for \emph{tape
       
   341   equality}. The reason is that moving the head one step to
       
   342   the left and then back to the right might change the tape (in case
       
   343   of going over the ``edge''). Therefore they distinguish four types
       
   344   of tapes: one where the tape is empty; another where the head
       
   345   is on the left edge, respectively right edge, and in the middle
       
   346   of the tape. The reading, writing and moving of the tape is then
       
   347   defined in terms of these four cases.  In this way they can keep the
       
   348   tape in a ``normalised'' form, and thus making a left-move followed
       
   349   by a right-move being the identity on tapes. Since we are not using
       
   350   the notion of tape equality, we can get away with the unsymmetric
       
   351   definition above, and by using the @{term update} function
       
   352   cover uniformly all cases including corner cases.
       
   353 
       
   354   Next we need to define the \emph{states} of a Turing machine.  Given
       
   355   how little is usually said about how to represent them in informal
       
   356   presentations, it might be surprising that in a theorem prover we
       
   357   have to select carefully a representation. If we use the naive
       
   358   representation where a Turing machine consists of a finite set of
       
   359   states, then we will have difficulties composing two Turing
       
   360   machines: we would need to combine two finite sets of states,
       
   361   possibly renaming states apart whenever both machines share
       
   362   states.\footnote{The usual disjoint union operation in Isabelle/HOL
       
   363   cannot be used as it does not preserve types.} This renaming can be
       
   364   quite cumbersome to reason about. Therefore we made the choice of
       
   365   representing a state by a natural number and the states of a Turing
       
   366   machine will always consist of the initial segment of natural
       
   367   numbers starting from @{text 0} up to the number of states of the
       
   368   machine. In doing so we can compose two Turing machine by
       
   369   shifting the states of one by an appropriate amount to a higher
       
   370   segment and adjusting some ``next states'' in the other.
       
   371 
       
   372   An \emph{instruction} @{term i} of a Turing machine is a pair consisting of 
       
   373   an action and a natural number (the next state). A \emph{program} @{term p} of a Turing
       
   374   machine is then a list of such pairs. Using as an example the following Turing machine
       
   375   program, which consists of four instructions
       
   376 
       
   377   \begin{equation}
       
   378   \begin{tikzpicture}
       
   379   \node [anchor=base] at (0,0) {@{thm dither_def}};
       
   380   \node [anchor=west] at (-1.5,-0.42) {$\underbrace{\hspace{21mm}}_{\text{1st state}}$};
       
   381   \node [anchor=west] at ( 1.1,-0.42) {$\underbrace{\hspace{17mm}}_{\text{2nd state}}$};
       
   382   \node [anchor=west] at (-1.5,0.65) {$\overbrace{\hspace{10mm}}^{\text{@{term Bk}-case}}$};
       
   383   \node [anchor=west] at (-0.1,0.65) {$\overbrace{\hspace{6mm}}^{\text{@{term Oc}-case}}$};
       
   384   \end{tikzpicture}
       
   385   \label{dither}
       
   386   \end{equation}
       
   387 
       
   388   \noindent
       
   389   the reader can see we have organised our Turing machine programs so
       
   390   that segments of two belong to a state. The first component of the
       
   391   segment determines what action should be taken and which next state
       
   392   should be transitioned to in case the head reads a @{term Bk};
       
   393   similarly the second component determines what should be done in
       
   394   case of reading @{term Oc}. We have the convention that the first
       
   395   state is always the \emph{starting state} of the Turing machine.
       
   396   The zeroth state is special in that it will be used as the
       
   397   ``halting state''.  There are no instructions for the @{text
       
   398   0}-state, but it will always perform a @{term Nop}-operation and
       
   399   remain in the @{text 0}-state.  Unlike Asperti and Riccioti
       
   400   \cite{AspertiRicciotti12}, we have chosen a very concrete
       
   401   representation for programs, because when constructing a universal
       
   402   Turing machine, we need to define a coding function for programs.
       
   403   This can be easily done for our programs-as-lists, but is more
       
   404   difficult for the functions used by Asperti and Ricciotti.
       
   405 
       
   406   Given a program @{term p}, a state
       
   407   and the cell being read by the head, we need to fetch
       
   408   the corresponding instruction from the program. For this we define 
       
   409   the function @{term fetch}
       
   410  
       
   411   \begin{center}
       
   412   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   413   \multicolumn{3}{l}{@{thm fetch_def2(1)[where b=DUMMY]}}\\
       
   414   @{thm (lhs) fetch_def2(2)} & @{text "\<equiv>"} & \\
       
   415   \multicolumn{3}{@ {\hspace{1cm}}l}{@{text "case nth_of p (2 * s) of"}}\\
       
   416   \multicolumn{3}{@ {\hspace{1.4cm}}l}{@{text "None \<Rightarrow> (Nop, 0) |"}}\\
       
   417   \multicolumn{3}{@ {\hspace{1.4cm}}l}{@{text "Some i \<Rightarrow> i"}}\\
       
   418   @{thm (lhs) fetch_def2(3)} & @{text "\<equiv>"} & \\
       
   419   \multicolumn{3}{@ {\hspace{1cm}}l}{@{text "case nth_of p (2 * s + 1) of"}}\\
       
   420   \multicolumn{3}{@ {\hspace{1.4cm}}l}{@{text "None \<Rightarrow> (Nop, 0) |"}}\\
       
   421   \multicolumn{3}{@ {\hspace{1.4cm}}l}{@{text "Some i \<Rightarrow> i"}}
       
   422   \end{tabular}
       
   423   \end{center}
       
   424 
       
   425   \noindent
       
   426   In this definition the function @{term nth_of} returns the @{text n}th element
       
   427   from a list, provided it exists (@{term Some}-case), or if it does not, it
       
   428   returns the default action @{term Nop} and the default state @{text 0} 
       
   429   (@{term None}-case). In doing so we slightly deviate from the description
       
   430   in \cite{Boolos87}: if their Turing machines transition to a non-existing
       
   431   state, then the computation is halted. We will transition in such cases
       
   432   to the @{text 0}-state. However, with introducing the
       
   433   notion of \emph{well-formed} Turing machine programs we will later exclude such
       
   434   cases and make the  @{text 0}-state the only ``halting state''. A program 
       
   435   @{term p} is said to be well-formed if it satisfies
       
   436   the following three properties:
       
   437 
       
   438   \begin{center}
       
   439   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   440   @{term "t_correct p"} & @{text "\<equiv>"} & @{term "2 <= length p"}\\
       
   441                         & @{text "\<and>"} & @{term "iseven (length p)"}\\
       
   442                         & @{text "\<and>"} & @{term "\<forall> (a, s) \<in> set p. s <= length p div 2"}
       
   443   \end{tabular}
       
   444   \end{center}
       
   445 
       
   446   \noindent
       
   447   The first says that @{text p} must have at least an instruction for the starting 
       
   448   state; the second that @{text p} has a @{term Bk} and @{term Oc} instruction for every 
       
   449   state, and the third that every next-state is one of the states mentioned in
       
   450   the program or being the @{text 0}-state.
       
   451 
       
   452   A \emph{configuration} @{term c} of a Turing machine is a state together with 
       
   453   a tape. This is written as @{text "(s, (l, r))"}. If we have a 
       
   454   configuration and a program, we can calculate
       
   455   what the next configuration is by fetching the appropriate action and next state
       
   456   from the program, and by updating the state and tape accordingly. 
       
   457   This single step of execution is defined as the function @{term tstep}
       
   458 
       
   459   \begin{center}
       
   460   \begin{tabular}{l}
       
   461   @{text "step (s, (l, r)) p"} @{text "\<equiv>"}\\
       
   462   \hspace{10mm}@{text "let (a, s) = fetch p s (read r)"}\\
       
   463   \hspace{10mm}@{text "in (s', update (l, r) a)"}
       
   464   \end{tabular}
       
   465   \end{center}
       
   466 
       
   467   \noindent
       
   468   where @{term "read r"} returns the head of the list @{text r}, or if @{text r} is
       
   469   empty it returns @{term Bk}.
       
   470   It is impossible in Isabelle/HOL to lift the @{term step}-function realising
       
   471   a general evaluation function for Turing machines. The reason is that functions in HOL-based
       
   472   provers need to be terminating, and clearly there are Turing machine 
       
   473   programs that are not. We can however define an evaluation
       
   474   function so that it performs exactly @{text n} steps:
       
   475 
       
   476   \begin{center}
       
   477   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   478   @{thm (lhs) steps.simps(1)} & @{text "\<equiv>"} & @{thm (rhs) steps.simps(1)}\\
       
   479   @{thm (lhs) steps.simps(2)} & @{text "\<equiv>"} & @{thm (rhs) steps.simps(2)}\\
       
   480   \end{tabular}
       
   481   \end{center}
       
   482 
       
   483   \noindent
       
   484   Recall our definition of @{term fetch} with the default value for
       
   485   the @{text 0}-state. In case a Turing program takes in \cite{Boolos87} less 
       
   486   then @{text n} steps before it halts, then in our setting the @{term steps}-evaluation
       
   487   does not actually halt, but rather transitions to the @{text 0}-state and 
       
   488   remains there performing @{text Nop}-actions until @{text n} is reached. 
       
   489 
       
   490   Given some input tape @{text "(l\<^isub>i,r\<^isub>i)"}, we can define when a program 
       
   491   @{term p} generates a specific  output tape @{text "(l\<^isub>o,r\<^isub>o)"}
       
   492 
       
   493   \begin{center}
       
   494   \begin{tabular}{l}
       
   495   @{term "runs p (l\<^isub>i, r\<^isub>i) (l\<^isub>o,r\<^isub>o)"} @{text "\<equiv>"}\\
       
   496   \hspace{6mm}@{text "\<exists>n. nsteps (1, (l\<^isub>i,r\<^isub>i)) p n = (0, (l\<^isub>o,r\<^isub>o))"}
       
   497   \end{tabular}
       
   498   \end{center}
       
   499 
       
   500   \noindent
       
   501   where @{text 1} stands for the starting state and @{text 0} for our final state.
       
   502   A program @{text p} with input tape @{term "(l\<^isub>i, r\<^isub>i)"} \emph{halts} iff
       
   503 
       
   504   \begin{center}
       
   505   @{term "halts p (l\<^isub>i, r\<^isub>i) \<equiv>
       
   506   \<exists>l\<^isub>o r\<^isub>o. runs p (l\<^isub>i, r\<^isub>i) (l\<^isub>o,r\<^isub>o)"}
       
   507   \end{center}
       
   508 
       
   509   \noindent
       
   510   Later on we need to consider specific Turing machines that 
       
   511   start with a tape in standard form and halt the computation
       
   512   in standard form. To define a tape in standard form, it is
       
   513   useful to have an operation @{term "tape_of_nat_list DUMMY"} that 
       
   514   translates 
       
   515   lists of natural numbers into tapes. 
       
   516 
       
   517   \begin{center}
       
   518   \begin{tabular}{l@ {\hspace{1mm}}c@ {\hspace{1mm}}l}
       
   519   @{thm (lhs) tape_of_nat_list_def2(1)} & @{text "\<equiv>"} & @{thm (rhs) tape_of_nat_list_def2(1)}\\
       
   520   @{thm (lhs) tape_of_nat_list_def2(2)} & @{text "\<equiv>"} & @{thm (rhs) tape_of_nat_list_def2(2)}\\
       
   521   @{thm (lhs) tape_of_nat_list_def2(3)} & @{text "\<equiv>"} & @{thm (rhs) tape_of_nat_list_def2(3)}\\
       
   522   \end{tabular}
       
   523   \end{center}
       
   524 
       
   525   
       
   526 
       
   527 
       
   528   By this we mean
       
   529 
       
   530   \begin{center}
       
   531   @{thm haltP_def2[where p="p" and n="n", THEN eq_reflection]}
       
   532   \end{center}
       
   533 
       
   534   \noindent
       
   535   This means the Turing machine starts with a tape containg @{text n} @{term Oc}s
       
   536   and the head pointing to the first one; the Turing machine
       
   537   halts with a tape consisting of some @{term Bk}s, followed by a 
       
   538   ``cluster'' of @{term Oc}s and after that by some @{term Bk}s.
       
   539   The head in the output is pointing again at the first @{term Oc}.
       
   540   The intuitive meaning of this definition is to start the Turing machine with a
       
   541   tape corresponding to a value @{term n} and producing
       
   542   a new tape corresponding to the value @{term l} (the number of @{term Oc}s
       
   543   clustered on the output tape).
       
   544 
       
   545   Before we can prove the undecidability of the halting problem for Turing machines, 
       
   546   we have to define how to compose two Turing machines. Given our setup, this is 
       
   547   relatively straightforward, if slightly fiddly. We use the following two
       
   548   auxiliary functions:
       
   549 
       
   550   \begin{center}
       
   551   \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}l@ {}}
       
   552   @{thm (lhs) tshift_def2} @{text "\<equiv>"}\\
       
   553   \hspace{4mm}@{thm (rhs) tshift_def2}\\
       
   554   @{thm (lhs) change_termi_state_def2} @{text "\<equiv>"}\\
       
   555   \hspace{4mm}@{text "map (\<lambda> (a, s)."}\\
       
   556   \hspace{14mm}@{text "(a, if s = 0 then length p div 2 + 1 else s)) p"}\\
       
   557   \end{tabular}
       
   558   \end{center}
       
   559 
       
   560   \noindent
       
   561   The first adds @{text n} to all states, exept the @{text 0}-state,
       
   562   thus moving all ``regular'' states to the segment starting at @{text
       
   563   n}; the second adds @{term "length p div 2 + 1"} to the @{text
       
   564   0}-state, thus ridirecting all references to the ``halting state''
       
   565   to the first state after the program @{text p}.  With these two
       
   566   functions in place, we can define the \emph{sequential composition}
       
   567   of two Turing machine programs @{text "p\<^isub>1"} and @{text "p\<^isub>2"}
       
   568 
       
   569   \begin{center}
       
   570   @{thm t_add.simps[where ?t1.0="p\<^isub>1" and ?t2.0="p\<^isub>2", THEN eq_reflection]}
       
   571   \end{center}
       
   572 
       
   573   \noindent
       
   574   This means @{text "p\<^isub>1"} is executed first. Whenever it originally
       
   575   transitioned to the @{text 0}-state, it will in the composed program transition to the starting
       
   576   state of @{text "p\<^isub>2"} instead. All the states of @{text "p\<^isub>2"}
       
   577   have been shifted in order to make sure that the states of the composed 
       
   578   program @{text "p\<^isub>1 \<oplus> p\<^isub>2"} still only ``occupy'' 
       
   579   an initial segment of the natural numbers.
       
   580 
       
   581   \begin{center}
       
   582   \begin{tabular}{@ {}l@ {\hspace{1mm}}c@ {\hspace{1mm}}p{6.9cm}@ {}}
       
   583   @{thm (lhs) tcopy_def} & @{text "\<equiv>"} & @{thm (rhs) tcopy_def}
       
   584   \end{tabular}
       
   585   \end{center}
       
   586 
       
   587 
       
   588   assertion holds for all tapes
       
   589 
       
   590   Hoare rule for composition
       
   591 
       
   592   For showing the undecidability of the halting problem, we need to consider
       
   593   two specific Turing machines. copying TM and dithering TM
       
   594 
       
   595   correctness of the copying TM
       
   596 
       
   597   measure for the copying TM, which we however omit.
       
   598 
       
   599   halting problem
       
   600 *}
       
   601 
       
   602 section {* Abacus Machines *}
       
   603 
       
   604 text {*
       
   605   \noindent
       
   606   Boolos et al \cite{Boolos87} use abacus machines as a 
       
   607   stepping stone for making it less laborious to write
       
   608   programs for Turing machines. Abacus machines operate
       
   609   over an unlimited number of registers $R_0$, $R_1$, \ldots
       
   610   each being able to hold an arbitrary large natural number.
       
   611   We use natural numbers to refer to registers, but also
       
   612   to refer to \emph{opcodes} of abacus 
       
   613   machines. Obcodes are given by the datatype
       
   614 
       
   615   \begin{center}
       
   616   \begin{tabular}{rcll}
       
   617   @{text "o"} & $::=$  & @{term "Inc R\<iota>"} & increment register $R$ by one\\
       
   618   & $\mid$ & @{term "Dec R\<iota> o\<iota>"} & if content of $R$ is non-zero,\\
       
   619   & & & then decrement it by one\\
       
   620   & & & otherwise jump to opcode $o$\\
       
   621   & $\mid$ & @{term "Goto o\<iota>"} & jump to opcode $o$
       
   622   \end{tabular}
       
   623   \end{center}
       
   624 
       
   625   \noindent
       
   626   A \emph{program} of an abacus machine is a list of such
       
   627   obcodes. For example the program clearing the register
       
   628   $R$ (setting it to 0) can be defined as follows:
       
   629 
       
   630   \begin{center}
       
   631   @{thm clear.simps[where n="R\<iota>" and e="o\<iota>", THEN eq_reflection]}
       
   632   \end{center}
       
   633 
       
   634   \noindent
       
   635   The second opcode @{term "Goto 0"} in this programm means we 
       
   636   jump back to the first opcode, namely @{text "Dec R o"}.
       
   637   The \emph{memory} $m$ of an abacus machine holding the values
       
   638   of the registers is represented as a list of natural numbers.
       
   639   We have a lookup function for this memory, written @{term "abc_lm_v m R\<iota>"},
       
   640   which looks up the content of register $R$; if $R$
       
   641   is not in this list, then we return 0. Similarly we
       
   642   have a setting function, written @{term "abc_lm_s m R\<iota> n"}, which
       
   643   sets the value of $R$ to $n$, and if $R$ was not yet in $m$
       
   644   it pads it approriately with 0s.
       
   645 
       
   646 
       
   647   Abacus machine halts when it jumps out of range.
       
   648 *}
       
   649 
       
   650 
       
   651 section {* Recursive Functions *}
       
   652 
       
   653 section {* Wang Tiles\label{Wang} *}
       
   654 
       
   655 text {*
       
   656   Used in texture mapings - graphics
       
   657 *}
       
   658 
       
   659 
       
   660 section {* Related Work *}
       
   661 
       
   662 text {*
       
   663   The most closely related work is by Norrish \cite{Norrish11}, and Asperti and 
       
   664   Ricciotti \cite{AspertiRicciotti12}. Norrish bases his approach on 
       
   665   lambda-terms. For this he introduced a clever rewriting technology
       
   666   based on combinators and de-Bruijn indices for
       
   667   rewriting modulo $\beta$-equivalence (to keep it manageable)
       
   668 *}
       
   669 
       
   670 
       
   671 (*
       
   672 Questions:
       
   673 
       
   674 Can this be done: Ackerman function is not primitive 
       
   675 recursive (Nora Szasz)
       
   676 
       
   677 Tape is represented as two lists (finite - usually infinite tape)?
       
   678 
       
   679 *)
       
   680 
       
   681 
       
   682 (*<*)
       
   683 end
       
   684 (*>*)