diff -r a0b280dd4bc7 -r 520127b708e6 ProgTutorial/First_Steps.thy --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ProgTutorial/First_Steps.thy Wed Jul 28 19:09:49 2010 +0200 @@ -0,0 +1,1434 @@ +theory First_Steps +imports Base +begin + +(*<*) +setup{* +open_file_with_prelude + "First_Steps_Code.thy" + ["theory First_Steps", "imports Main", "begin"] +*} +(*>*) + +chapter {* First Steps\label{chp:firststeps} *} + +text {* + \begin{flushright} + {\em + ``We will most likely never realize the full importance of painting the Tower,\\ + that it is the essential element in the conservation of metal works and the\\ + more meticulous the paint job, the longer the tower shall endure.''} \\[1ex] + Gustave Eiffel, In his book {\em The 300-Meter Tower}.\footnote{The Eiffel Tower has been + re-painted 18 times since its initial construction, an average of once every + seven years. It takes more than one year for a team of 25 painters to paint the tower + from top to bottom.} + \end{flushright} + + \medskip + Isabelle programming is done in ML. Just like lemmas and proofs, ML-code for + Isabelle must be part of a theory. If you want to follow the code given in + this chapter, we assume you are working inside the theory starting with + + \begin{quote} + \begin{tabular}{@ {}l} + \isacommand{theory} First\_Steps\\ + \isacommand{imports} Main\\ + \isacommand{begin}\\ + \ldots + \end{tabular} + \end{quote} + + We also generally assume you are working with the logic HOL. The examples + that will be given might need to be adapted if you work in a different logic. +*} + +section {* Including ML-Code\label{sec:include} *} + +text {* + The easiest and quickest way to include code in a theory is by using the + \isacommand{ML}-command. For example: + + \begin{isabelle} + \begin{graybox} + \isacommand{ML}~@{text "\"}\isanewline + \hspace{5mm}@{ML "3 + 4"}\isanewline + @{text "\"}\isanewline + @{text "> 7"}\smallskip + \end{graybox} + \end{isabelle} + + Like normal Isabelle scripts, \isacommand{ML}-commands can be evaluated by + using the advance and undo buttons of your Isabelle environment. The code + inside the \isacommand{ML}-command can also contain value and function + bindings, for example +*} + +ML %gray {* + val r = Unsynchronized.ref 0 + fun f n = n + 1 +*} + +text {* + and even those can be undone when the proof script is retracted. As + mentioned in the Introduction, we will drop the \isacommand{ML}~@{text + "\ \ \"} scaffolding whenever we show code. The lines + prefixed with @{text [quotes] ">"} are not part of the code, rather they + indicate what the response is when the code is evaluated. There are also + the commands \isacommand{ML\_val} and \isacommand{ML\_prf} for including + ML-code. The first evaluates the given code, but any effect on the theory, + in which the code is embedded, is suppressed. The second needs to be used if + ML-code is defined inside a proof. For example + + \begin{quote} + \begin{isabelle} + \isacommand{lemma}~@{text "test:"}\isanewline + \isacommand{shows}~@{text [quotes] "True"}\isanewline + \isacommand{ML\_prf}~@{text "\"}~@{ML "writeln \"Trivial!\""}~@{text "\"}\isanewline + \isacommand{oops} + \end{isabelle} + \end{quote} + + However, both commands will only play minor roles in this tutorial (we will + always arrange that the ML-code is defined outside proofs). + + + + + Once a portion of code is relatively stable, you usually want to export it + to a separate ML-file. Such files can then be included somewhere inside a + theory by using the command \isacommand{use}. For example + + \begin{quote} + \begin{tabular}{@ {}l} + \isacommand{theory} First\_Steps\\ + \isacommand{imports} Main\\ + \isacommand{uses}~@{text "(\"file_to_be_included.ML\")"} @{text "\"}\\ + \isacommand{begin}\\ + \ldots\\ + \isacommand{use}~@{text "\"file_to_be_included.ML\""}\\ + \ldots + \end{tabular} + \end{quote} + + The \isacommand{uses}-command in the header of the theory is needed in order + to indicate the dependency of the theory on the ML-file. Alternatively, the + file can be included by just writing in the header + + \begin{quote} + \begin{tabular}{@ {}l} + \isacommand{theory} First\_Steps\\ + \isacommand{imports} Main\\ + \isacommand{uses} @{text "\"file_to_be_included.ML\""} @{text "\"}\\ + \isacommand{begin}\\ + \ldots + \end{tabular} + \end{quote} + + Note that no parentheses are given in this case. Note also that the included + ML-file should not contain any \isacommand{use} itself. Otherwise Isabelle + is unable to record all file dependencies, which is a nuisance if you have + to track down errors. +*} + +section {* Printing and Debugging\label{sec:printing} *} + +text {* + During development you might find it necessary to inspect data in your + code. This can be done in a ``quick-and-dirty'' fashion using the function + @{ML_ind writeln in Output}. For example + + @{ML_response_fake [display,gray] "writeln \"any string\"" "\"any string\""} + + will print out @{text [quotes] "any string"} inside the response buffer of + Isabelle. This function expects a string as argument. If you develop under + PolyML, then there is a convenient, though again ``quick-and-dirty'', method + for converting values into strings, namely the antiquotation + @{text "@{make_string}"}: + + @{ML_response_fake [display,gray] "writeln (@{make_string} 1)" "\"1\""} + + However, @{text "@{makes_tring}"} only works if the type of what + is converted is monomorphic and not a function. + + The function @{ML "writeln"} should only be used for testing purposes, + because any output this function generates will be overwritten as soon as an + error is raised. For printing anything more serious and elaborate, the + function @{ML_ind tracing in Output} is more appropriate. This function writes all + output into a separate tracing buffer. For example: + + @{ML_response_fake [display,gray] "tracing \"foo\"" "\"foo\""} + + It is also possible to redirect the ``channel'' where the string @{text + "foo"} is printed to a separate file, e.g., in order to prevent ProofGeneral from + choking on massive amounts of trace output. This redirection can be achieved + with the code: +*} + +ML{*val strip_specials = +let + fun strip ("\^A" :: _ :: cs) = strip cs + | strip (c :: cs) = c :: strip cs + | strip [] = []; +in + implode o strip o explode +end + +fun redirect_tracing stream = + Output.tracing_fn := (fn s => + (TextIO.output (stream, (strip_specials s)); + TextIO.output (stream, "\n"); + TextIO.flushOut stream)) *} + +text {* + Calling now + + @{ML [display,gray] "redirect_tracing (TextIO.openOut \"foo.bar\")"} + + will cause that all tracing information is printed into the file @{text "foo.bar"}. + + You can print out error messages with the function @{ML_ind error in Library}; for + example: + + @{ML_response_fake [display,gray] + "if 0=1 then true else (error \"foo\")" +"Exception- ERROR \"foo\" raised +At command \"ML\"."} + + This function raises the exception @{text ERROR}, which will then + be displayed by the infrastructure. Note that this exception is meant + for ``user-level'' error messages seen by the ``end-user''. + + For messages where you want to indicate a genuine program error, then + use the exception @{text Fail}. Here the infrastructure indicates that this + is a low-level exception, and also prints the source position of the ML + raise statement. + + + \footnote{\bf FIXME Mention how to work with @{ML_ind debug in Toplevel} and + @{ML_ind profiling in Toplevel}.} + +*} + +(* FIXME*) +(* +ML {* reset Toplevel.debug *} + +ML {* fun dodgy_fun () = (raise TYPE ("",[],[]); 1) *} + +ML {* fun innocent () = dodgy_fun () *} +ML {* exception_trace (fn () => cterm_of @{theory} (Bound 0)) *} +ML {* exception_trace (fn () => innocent ()) *} + +ML {* Toplevel.program (fn () => cterm_of @{theory} (Bound 0)) *} + +ML {* Toplevel.program (fn () => innocent ()) *} +*) + +text {* + %Kernel exceptions TYPE, TERM, THM also have their place in situations + %where kernel-like operations are involved. The printing is similar as for + %Fail, although there is some special treatment when Toplevel.debug is + %enabled. + + Most often you want to inspect data of Isabelle's basic data structures, + namely @{ML_type term}, @{ML_type typ}, @{ML_type cterm}, @{ML_type ctyp} + and @{ML_type thm}. Isabelle contains elaborate pretty-printing functions, + which we will explain in more detail in Section \ref{sec:pretty}. For now + we just use the functions @{ML_ind writeln in Pretty} from the structure + @{ML_struct Pretty} and @{ML_ind pretty_term in Syntax} from the structure + @{ML_struct Syntax}. For more convenience, we bind them to the toplevel. +*} + +ML{*val pretty_term = Syntax.pretty_term +val pwriteln = Pretty.writeln*} + +text {* + They can now be used as follows + + @{ML_response_fake [display,gray] + "pwriteln (pretty_term @{context} @{term \"1::nat\"})" + "\"1\""} + + If there is more than one term to be printed, you can use the + function @{ML_ind enum in Pretty} and commas to separate them. +*} + +ML{*fun pretty_terms ctxt ts = + Pretty.enum "," "" "" (map (pretty_term ctxt) ts)*} + +text {* + You can also print out terms together with their typing information. + For this you need to set the reference @{ML_ind show_types in Syntax} + to @{ML true}. +*} + +ML{*show_types := true*} + +text {* + Now @{ML pretty_term} prints out + + @{ML_response_fake [display, gray] + "pwriteln (pretty_term @{context} @{term \"(1::nat, x)\"})" + "(1::nat, x::'a)"} + + where @{text 1} and @{text x} are displayed with their inferred type. + Even more type information can be printed by setting + the reference @{ML_ind show_all_types in Syntax} to @{ML true}. + In this case we obtain +*} +(*<*)ML %linenos {*show_all_types := true*} +(*>*) +text {* + @{ML_response_fake [display, gray] + "pwriteln (pretty_term @{context} @{term \"(1::nat, x)\"})" + "(Pair::nat \ 'a \ nat \ 'a) (1::nat) (x::'a)"} + + where @{term Pair} is the term-constructor for products. + Other references that influence printing of terms are + @{ML_ind show_brackets in Syntax} and @{ML_ind show_sorts in Syntax}. +*} +(*<*)ML %linenos {*show_types := false; show_all_types := false*} +(*>*) +text {* + A @{ML_type cterm} can be printed with the following function. +*} + +ML{*fun pretty_cterm ctxt ct = + pretty_term ctxt (term_of ct)*} + +text {* + Here the function @{ML_ind term_of in Thm} extracts the @{ML_type + term} from a @{ML_type cterm}. More than one @{ML_type cterm}s can be + printed again with @{ML enum in Pretty}. +*} + +ML{*fun pretty_cterms ctxt cts = + Pretty.enum "," "" "" (map (pretty_cterm ctxt) cts)*} + +text {* + The easiest way to get the string of a theorem is to transform it + into a @{ML_type term} using the function @{ML_ind prop_of in Thm}. +*} + +ML{*fun pretty_thm ctxt thm = + pretty_term ctxt (prop_of thm)*} + +text {* + Theorems include schematic variables, such as @{text "?P"}, + @{text "?Q"} and so on. They are needed in Isabelle in order to able to + instantiate theorems when they are applied. For example the theorem + @{thm [source] conjI} shown below can be used for any (typable) + instantiation of @{text "?P"} and @{text "?Q"}. + + @{ML_response_fake [display, gray] + "pwriteln (pretty_thm @{context} @{thm conjI})" + "\?P; ?Q\ \ ?P \ ?Q"} + + However, in order to improve the readability when printing theorems, we + convert these schematic variables into free variables using the function + @{ML_ind import in Variable}. This is similar to statements like @{text + "conjI[no_vars]"} on Isabelle's user-level. +*} + +ML{*fun no_vars ctxt thm = +let + val ((_, [thm']), _) = Variable.import true [thm] ctxt +in + thm' +end + +fun pretty_thm_no_vars ctxt thm = + pretty_term ctxt (prop_of (no_vars ctxt thm))*} + + +text {* + With this function, theorem @{thm [source] conjI} is now printed as follows: + + @{ML_response_fake [display, gray] + "pwriteln (pretty_thm_no_vars @{context} @{thm conjI})" + "\P; Q\ \ P \ Q"} + + Again the function @{ML commas} helps with printing more than one theorem. +*} + +ML{*fun pretty_thms ctxt thms = + Pretty.enum "," "" "" (map (pretty_thm ctxt) thms) + +fun pretty_thms_no_vars ctxt thms = + Pretty.enum "," "" "" (map (pretty_thm_no_vars ctxt) thms)*} + +text {* + The printing functions for types are +*} + +ML{*fun pretty_typ ctxt ty = Syntax.pretty_typ ctxt ty +fun pretty_typs ctxt tys = Pretty.commas (map (pretty_typ ctxt) tys)*} + +text {* + respectively ctypes +*} + +ML{*fun pretty_ctyp ctxt cty = pretty_typ ctxt (typ_of cty) +fun pretty_ctyps ctxt ctys = Pretty.commas (map (pretty_ctyp ctxt) ctys)*} + +text {* + \begin{readmore} + The simple conversion functions from Isabelle's main datatypes to + @{ML_type string}s are implemented in @{ML_file "Pure/Syntax/syntax.ML"}. + The references that change the printing information are declared in + @{ML_file "Pure/Syntax/printer.ML"}. + \end{readmore} + + Note that for printing out several ``parcels'' of information that belong + together, like a warning message consisting of a term and its type, you + should try to print these parcels together in a single string. Therefore do + \emph{not} print out information as + +@{ML_response_fake [display,gray] +"writeln \"First half,\"; +writeln \"and second half.\"" +"First half, +and second half."} + + but as a single string with appropriate formatting. For example + +@{ML_response_fake [display,gray] +"writeln (\"First half,\" ^ \"\\n\" ^ \"and second half.\")" +"First half, +and second half."} + + To ease this kind of string manipulations, there are a number + of library functions in Isabelle. For example, the function + @{ML_ind cat_lines in Library} concatenates a list of strings + and inserts newlines in between each element. + + @{ML_response_fake [display, gray] + "writeln (cat_lines [\"foo\", \"bar\"])" + "foo +bar"} + + Section \ref{sec:pretty} will explain the infrastructure that Isabelle + provides for more elaborate pretty printing. + + \begin{readmore} + Most of the basic string functions of Isabelle are defined in + @{ML_file "Pure/library.ML"}. + \end{readmore} +*} + + +section {* Combinators\label{sec:combinators} *} + +text {* + For beginners perhaps the most puzzling parts in the existing code of + Isabelle are the combinators. At first they seem to greatly obstruct the + comprehension of code, but after getting familiar with them and handled with + care, they actually ease the understanding and also the programming. + + The simplest combinator is @{ML_ind I in Library}, which is just the + identity function defined as +*} + +ML{*fun I x = x*} + +text {* + Another simple combinator is @{ML_ind K in Library}, defined as +*} + +ML{*fun K x = fn _ => x*} + +text {* + @{ML K} ``wraps'' a function around @{text "x"} that ignores its argument. As a + result, @{ML K} defines a constant function always returning @{text x}. + + The next combinator is reverse application, @{ML_ind "|>" in Basics}, defined as: +*} + +ML{*fun x |> f = f x*} + +text {* While just syntactic sugar for the usual function application, + the purpose of this combinator is to implement functions in a + ``waterfall fashion''. Consider for example the function *} + +ML %linenosgray{*fun inc_by_five x = + x |> (fn x => x + 1) + |> (fn x => (x, x)) + |> fst + |> (fn x => x + 4)*} + +text {* + which increments its argument @{text x} by 5. It does this by first + incrementing the argument by 1 (Line 2); then storing the result in a pair + (Line 3); taking the first component of the pair (Line 4) and finally + incrementing the first component by 4 (Line 5). This kind of cascading + manipulations of values is quite common when dealing with theories. The + reverse application allows you to read what happens in a top-down + manner. This kind of coding should be familiar, if you have been exposed to + Haskell's {\it do}-notation. Writing the function @{ML inc_by_five} using + the reverse application is much clearer than writing +*} + +ML{*fun inc_by_five x = fst ((fn x => (x, x)) (x + 1)) + 4*} + +text {* or *} + +ML{*fun inc_by_five x = + ((fn x => x + 4) o fst o (fn x => (x, x)) o (fn x => x + 1)) x*} + +text {* and typographically more economical than *} + +ML{*fun inc_by_five x = +let val y1 = x + 1 + val y2 = (y1, y1) + val y3 = fst y2 + val y4 = y3 + 4 +in y4 end*} + +text {* + Another reason why the let-bindings in the code above are better to be + avoided: it is more than easy to get the intermediate values wrong, not to + mention the nightmares the maintenance of this code causes! + + In Isabelle a ``real world'' example for a function written in + the waterfall fashion might be the following code: +*} + +ML %linenosgray{*fun apply_fresh_args f ctxt = + f |> fastype_of + |> binder_types + |> map (pair "z") + |> Variable.variant_frees ctxt [f] + |> map Free + |> curry list_comb f *} + +text {* + This function takes a term and a context as argument. If the term is of function + type, then @{ML "apply_fresh_args"} returns the term with distinct variables + applied to it. For example below three variables are applied to the term + @{term [show_types] "P::nat \ int \ unit \ bool"}: + + @{ML_response_fake [display,gray] +"let + val trm = @{term \"P::nat \ int \ unit \ bool\"} + val ctxt = @{context} +in + apply_fresh_args trm ctxt + |> pretty_term ctxt + |> pwriteln +end" + "P z za zb"} + + You can read off this behaviour from how @{ML apply_fresh_args} is coded: in + Line 2, the function @{ML_ind fastype_of in Term} calculates the type of the + term; @{ML_ind binder_types in Term} in the next line produces the list of + argument types (in the case above the list @{text "[nat, int, unit]"}); Line + 4 pairs up each type with the string @{text "z"}; the function @{ML_ind + variant_frees in Variable} generates for each @{text "z"} a unique name + avoiding the given @{text f}; the list of name-type pairs is turned into a + list of variable terms in Line 6, which in the last line is applied by the + function @{ML_ind list_comb in Term} to the original term. In this last step we have + to use the function @{ML_ind curry in Library}, because @{ML list_comb} + expects the function and the variables list as a pair. + + Functions like @{ML apply_fresh_args} are often needed when constructing + terms involving fresh variables. For this the infrastructure helps + tremendously to avoid any name clashes. Consider for example: + + @{ML_response_fake [display,gray] +"let + val trm = @{term \"za::'a \ 'b \ 'c\"} + val ctxt = @{context} +in + apply_fresh_args trm ctxt + |> pretty_term ctxt + |> pwriteln +end" + "za z zb"} + + where the @{text "za"} is correctly avoided. + + The combinator @{ML_ind "#>" in Basics} is the reverse function composition. + It can be used to define the following function +*} + +ML{*val inc_by_six = + (fn x => x + 1) #> + (fn x => x + 2) #> + (fn x => x + 3)*} + +text {* + which is the function composed of first the increment-by-one function and + then increment-by-two, followed by increment-by-three. Again, the reverse + function composition allows you to read the code top-down. This combinator + is often used for setup functions inside the + \isacommand{setup}-command. These functions have to be of type @{ML_type + "theory -> theory"}. More than one such setup function can be composed with + @{ML "#>"}. For example +*} + +setup %gray {* let + val (ival1, setup_ival1) = Attrib.config_int "ival1" (K 1) + val (ival2, setup_ival2) = Attrib.config_int "ival2" (K 2) +in + setup_ival1 #> + setup_ival2 +end *} + +text {* + after this the configuration values @{text ival1} and @{text ival2} are known + in the current theory and can be manipulated by the user (for more information + about configuration values see Section~\ref{sec:storing}, for more about setup + functions see Section~\ref{sec:theories}). + + The remaining combinators we describe in this section add convenience for the + ``waterfall method'' of writing functions. The combinator @{ML_ind tap in + Basics} allows you to get hold of an intermediate result (to do some + side-calculations for instance). The function + *} + +ML %linenosgray{*fun inc_by_three x = + x |> (fn x => x + 1) + |> tap (fn x => tracing (PolyML.makestring x)) + |> (fn x => x + 2)*} + +text {* + increments the argument first by @{text "1"} and then by @{text "2"}. In the + middle (Line 3), however, it uses @{ML tap} for printing the ``plus-one'' + intermediate result. The function @{ML tap} can only be used for + side-calculations, because any value that is computed cannot be merged back + into the ``main waterfall''. To do this, you can use the next combinator. + + The combinator @{ML_ind "`" in Basics} (a backtick) is similar to @{ML tap}, + but applies a function to the value and returns the result together with the + value (as a pair). It is defined as +*} + +ML{*fun `f = fn x => (f x, x)*} + +text {* + An example for this combinator is the function +*} + +ML{*fun inc_as_pair x = + x |> `(fn x => x + 1) + |> (fn (x, y) => (x, y + 1))*} + +text {* + which takes @{text x} as argument, and then increments @{text x}, but also keeps + @{text x}. The intermediate result is therefore the pair @{ML "(x + 1, x)" + for x}. After that, the function increments the right-hand component of the + pair. So finally the result will be @{ML "(x + 1, x + 1)" for x}. + + The combinators @{ML_ind "|>>" in Basics} and @{ML_ind "||>" in Basics} are + defined for functions manipulating pairs. The first applies the function to + the first component of the pair, defined as +*} + +ML{*fun (x, y) |>> f = (f x, y)*} + +text {* + and the second combinator to the second component, defined as +*} + +ML{*fun (x, y) ||> f = (x, f y)*} + +text {* + These two functions can, for example, be used to avoid explicit @{text "lets"} for + intermediate values in functions that return pairs. As an example, suppose you + want to separate a list of integers into two lists according to a + threshold. If the threshold is @{ML "5"}, the list @{ML "[1,6,2,5,3,4]"} + should be separated as @{ML "([1,2,3,4], [6,5])"}. Such a function can be + implemented as +*} + +ML{*fun separate i [] = ([], []) + | separate i (x::xs) = + let + val (los, grs) = separate i xs + in + if i <= x then (los, x::grs) else (x::los, grs) + end*} + +text {* + where the return value of the recursive call is bound explicitly to + the pair @{ML "(los, grs)" for los grs}. However, this function + can be implemented more concisely as +*} + +ML{*fun separate i [] = ([], []) + | separate i (x::xs) = + if i <= x + then separate i xs ||> cons x + else separate i xs |>> cons x*} + +text {* + avoiding the explicit @{text "let"}. While in this example the gain in + conciseness is only small, in more complicated situations the benefit of + avoiding @{text "lets"} can be substantial. + + With the combinator @{ML_ind "|->" in Basics} you can re-combine the + elements from a pair. This combinator is defined as +*} + +ML{*fun (x, y) |-> f = f x y*} + +text {* + and can be used to write the following roundabout version + of the @{text double} function: +*} + +ML{*fun double x = + x |> (fn x => (x, x)) + |-> (fn x => fn y => x + y)*} + +text {* + The combinator @{ML_ind ||>> in Basics} plays a central rôle whenever your + task is to update a theory and the update also produces a side-result (for + example a theorem). Functions for such tasks return a pair whose second + component is the theory and the fist component is the side-result. Using + @{ML ||>>}, you can do conveniently the update and also + accumulate the side-results. Consider the following simple function. +*} + +ML %linenosgray{*fun acc_incs x = + x |> (fn x => ("", x)) + ||>> (fn x => (x, x + 1)) + ||>> (fn x => (x, x + 1)) + ||>> (fn x => (x, x + 1))*} + +text {* + The purpose of Line 2 is to just pair up the argument with a dummy value (since + @{ML ||>>} operates on pairs). Each of the next three lines just increment + the value by one, but also nest the intermediate results to the left. For example + + @{ML_response [display,gray] + "acc_incs 1" + "((((\"\", 1), 2), 3), 4)"} + + You can continue this chain with: + + @{ML_response [display,gray] + "acc_incs 1 ||>> (fn x => (x, x + 2))" + "(((((\"\", 1), 2), 3), 4), 6)"} + + \footnote{\bf FIXME: maybe give a ``real world'' example for this combinator.} +*} + +text {* + Recall that @{ML "|>"} is the reverse function application. Recall also that + the related reverse function composition is @{ML "#>"}. In fact all the + combinators @{ML "|->"}, @{ML "|>>"} , @{ML "||>"} and @{ML "||>>"} + described above have related combinators for function composition, namely + @{ML_ind "#->" in Basics}, @{ML_ind "#>>" in Basics}, @{ML_ind "##>" in + Basics} and @{ML_ind "##>>" in Basics}. Using @{ML "#->"}, for example, the + function @{text double} can also be written as: +*} + +ML{*val double = + (fn x => (x, x)) + #-> (fn x => fn y => x + y)*} + + +text {* + When using combinators for writing functions in waterfall fashion, it is + sometimes necessary to do some ``plumbing'' in order to fit functions + together. We have already seen such plumbing in the function @{ML + apply_fresh_args}, where @{ML curry} is needed for making the function @{ML + list_comb}, which works over pairs, to fit with the combinator @{ML "|>"}. Such + plumbing is also needed in situations where a function operates over lists, + but one calculates only with a single element. An example is the function + @{ML_ind check_terms in Syntax}, whose purpose is to simultaneously type-check + a list of terms. Consider the code: + + @{ML_response_fake [display, gray] + "let + val ctxt = @{context} +in + map (Syntax.parse_term ctxt) [\"m + n\", \"m * n\", \"m - (n::nat)\"] + |> Syntax.check_terms ctxt + |> pretty_terms ctxt + |> pwriteln +end" + "m + n, m * n, m - n"} +*} + +text {* + In this example we obtain three terms (using the function + @{ML_ind parse_term in Syntax}) whose variables @{text m} and @{text n} + are of type @{typ "nat"}. If you have only a single term, then @{ML + check_terms in Syntax} needs plumbing. This can be done with the function + @{ML_ind singleton in Library}.\footnote{There is already a function @{ML check_term in + Syntax} in the file @{ML_file "Pure/Syntax/syntax.ML"} that is implemented + in terms of @{ML singleton} and @{ML check_terms in Syntax}.} For example + + @{ML_response_fake [display, gray, linenos] + "let + val ctxt = @{context} +in + Syntax.parse_term ctxt \"m - (n::nat)\" + |> singleton (Syntax.check_terms ctxt) + |> pretty_term ctxt + |> pwriteln +end" + "m - n"} + + where in Line 5, the function operating over lists fits with the + single term generated in Line 4. + + \begin{readmore} + The most frequently used combinators are defined in the files @{ML_file + "Pure/library.ML"} + and @{ML_file "Pure/General/basics.ML"}. Also \isccite{sec:ML-linear-trans} + contains further information about combinators. + \end{readmore} + + \footnote{\bf FIXME: find a good exercise for combinators} + \begin{exercise} + Find out what the combinator @{ML "K I"} does. + \end{exercise} + + + \footnote{\bf FIXME: say something about calling conventions} +*} + + +section {* ML-Antiquotations\label{sec:antiquote} *} + +text {* + Recall from Section \ref{sec:include} that code in Isabelle is always + embedded in a theory. The main advantage of this is that the code can + contain references to entities defined on the logical level of Isabelle. By + this we mean references to definitions, theorems, terms and so on. These + reference are realised in Isabelle with ML-antiquotations, often just called + antiquotations.\footnote{Note that there are two kinds of antiquotations in + Isabelle, which have very different purposes and infrastructures. The first + kind, described in this section, are \emph{\index*{ML-antiquotation}}. They + are used to refer to entities (like terms, types etc) from Isabelle's logic + layer inside ML-code. The other kind of antiquotations are + \emph{document}\index{document antiquotation} antiquotations. They are used + only in the text parts of Isabelle and their purpose is to print logical + entities inside \LaTeX-documents. Document antiquotations are part of the + user level and therefore we are not interested in them in this Tutorial, + except in Appendix \ref{rec:docantiquotations} where we show how to + implement your own document antiquotations.} Syntactically antiquotations + are indicated by the @{ML_text @}-sign followed by text wrapped in @{text + "{\}"}. For example, one can print out the name of the current theory with + the code + + @{ML_response [display,gray] "Context.theory_name @{theory}" "\"First_Steps\""} + + where @{text "@{theory}"} is an antiquotation that is substituted with the + current theory (remember that we assumed we are inside the theory + @{text First_Steps}). The name of this theory can be extracted using + the function @{ML_ind theory_name in Context}. + + Note, however, that antiquotations are statically linked, that is their value is + determined at ``compile-time'', not at ``run-time''. For example the function +*} + +ML{*fun not_current_thyname () = Context.theory_name @{theory} *} + +text {* + does \emph{not} return the name of the current theory, if it is run in a + different theory. Instead, the code above defines the constant function + that always returns the string @{text [quotes] "First_Steps"}, no matter where the + function is called. Operationally speaking, the antiquotation @{text "@{theory}"} is + \emph{not} replaced with code that will look up the current theory in + some data structure and return it. Instead, it is literally + replaced with the value representing the theory. + + Another important antiquotation is @{text "@{context}"}. (What the + difference between a theory and a context is will be described in Chapter + \ref{chp:advanced}.) A context is for example needed in order to use the + function @{ML print_abbrevs in ProofContext} that list of all currently + defined abbreviations. + + @{ML_response_fake [display, gray] + "ProofContext.print_abbrevs @{context}" +"Code_Evaluation.valtermify \ \x. (x, \u. Code_Evaluation.termify x) +INTER \ INFI +Inter \ Inf +\"} + + You can also use antiquotations to refer to proved theorems: + @{text "@{thm \}"} for a single theorem + + @{ML_response_fake [display,gray] "@{thm allI}" "(\x. ?P x) \ \x. ?P x"} + + and @{text "@{thms \}"} for more than one + +@{ML_response_fake [display,gray] + "@{thms conj_ac}" +"(?P \ ?Q) = (?Q \ ?P) +(?P \ ?Q \ ?R) = (?Q \ ?P \ ?R) +((?P \ ?Q) \ ?R) = (?P \ ?Q \ ?R)"} + + The thm-antiquotations can also be used for manipulating theorems. For + example, if you need the version of te theorem @{thm [source] refl} that + has a meta-equality instead of an equality, you can write + +@{ML_response_fake [display,gray] + "@{thm refl[THEN eq_reflection]}" + "?x \ ?x"} + + The point of these antiquotations is that referring to theorems in this way + makes your code independent from what theorems the user might have stored + under this name (this becomes especially important when you deal with + theorem lists; see Section \ref{sec:storing}). + + It is also possible to prove lemmas with the antiquotation @{text "@{lemma \ by \}"} + whose first argument is a statement (possibly many of them separated by @{text "and"}) + and the second is a proof. For example +*} + +ML{*val foo_thm = @{lemma "True" and "False \ P" by simp_all} *} + +text {* + The result can be printed out as follows. + + @{ML_response_fake [gray,display] +"foo_thm |> pretty_thms_no_vars @{context} + |> pwriteln" + "True, False \ P"} + + You can also refer to the current simpset via an antiquotation. To illustrate + this we implement the function that extracts the theorem names stored in a + simpset. +*} + +ML{*fun get_thm_names_from_ss simpset = +let + val {simps,...} = MetaSimplifier.dest_ss simpset +in + map #1 simps +end*} + +text {* + The function @{ML_ind dest_ss in MetaSimplifier} returns a record containing all + information stored in the simpset, but here we are only interested in the names of the + simp-rules. Now you can feed in the current simpset into this function. + The current simpset can be referred to using the antiquotation @{text "@{simpset}"}. + + @{ML_response_fake [display,gray] + "get_thm_names_from_ss @{simpset}" + "[\"Nat.of_nat_eq_id\", \"Int.of_int_eq_id\", \"Nat.One_nat_def\", \]"} + + Again, this way of referencing simpsets makes you independent from additions + of lemmas to the simpset by the user, which can potentially cause loops in your + code. + + It is also possible to define your own antiquotations. But you should + exercise care when introducing new ones, as they can also make your code + also difficult to read. In the next chapter we describe how to construct + terms with the (build in) antiquotation @{text "@{term \}"}. A restriction + of this antiquotation is that it does not allow you to use schematic + variables in terms. If you want to have an antiquotation that does not have + this restriction, you can implement your own using the function @{ML_ind + inline in ML_Antiquote} from the structure @{ML_struct ML_Antiquote}. The code + for the antiquotation @{text "term_pat"} is as follows. +*} + +ML %linenosgray{*let + val parser = Args.context -- Scan.lift Args.name_source + + fun term_pat (ctxt, str) = + str |> ProofContext.read_term_pattern ctxt + |> ML_Syntax.print_term + |> ML_Syntax.atomic +in + ML_Antiquote.inline "term_pat" (parser >> term_pat) +end*} + +text {* + The parser in Line 2 provides us with a context and a string; this string is + transformed into a term using the function @{ML_ind read_term_pattern in + ProofContext} (Line 5); the next two lines transform the term into a string + so that the ML-system can understand it. (All these functions will be explained + in more detail in later sections.) An example for this antiquotation is: + + @{ML_response_fake [display,gray] + "@{term_pat \"Suc (?x::nat)\"}" + "Const (\"Suc\", \"nat \ nat\") $ Var ((\"x\", 0), \"nat\")"} + + which shows the internal representation of the term @{text "Suc ?x"}. Similarly + we can write an antiquotation for type patterns. +*} + +ML{*let + val parser = Args.context -- Scan.lift Args.name_source + + fun typ_pat (ctxt, str) = + str |> Syntax.parse_typ ctxt + |> ML_Syntax.print_typ + |> ML_Syntax.atomic +in + ML_Antiquote.inline "typ_pat" (parser >> typ_pat) +end*} + +text {* + \begin{readmore} + The file @{ML_file "Pure/ML/ml_antiquote.ML"} contains the the definitions + for most antiquotations. Most of the basic operations on ML-syntax are implemented + in @{ML_file "Pure/ML/ml_syntax.ML"}. + \end{readmore} +*} + +section {* Storing Data in Isabelle\label{sec:storing} *} + +text {* + Isabelle provides mechanisms for storing (and retrieving) arbitrary + data. Before we delve into the details, let us digress a bit. Conventional + wisdom has it that the type-system of ML ensures that an + @{ML_type "'a list"}, say, can only hold elements of the same type, namely + @{ML_type "'a"}. Despite this wisdom, however, it is possible to implement a + universal type in ML, although by some arguably accidental features of ML. + This universal type can be used to store data of different type into a single list. + In fact, it allows one to inject and to project data of \emph{arbitrary} type. This is + in contrast to datatypes, which only allow injection and projection of data for + some \emph{fixed} collection of types. In light of the conventional wisdom cited + above it is important to keep in mind that the universal type does not + destroy type-safety of ML: storing and accessing the data can only be done + in a type-safe manner. + + \begin{readmore} + In Isabelle the universal type is implemented as the type @{ML_type + Universal.universal} in the file + @{ML_file "Pure/ML-Systems/universal.ML"}. + \end{readmore} + + We will show the usage of the universal type by storing an integer and + a boolean into a single list. Let us first define injection and projection + functions for booleans and integers into and from the type @{ML_type Universal.universal}. +*} + +ML{*local + val fn_int = Universal.tag () : int Universal.tag + val fn_bool = Universal.tag () : bool Universal.tag +in + val inject_int = Universal.tagInject fn_int; + val inject_bool = Universal.tagInject fn_bool; + val project_int = Universal.tagProject fn_int; + val project_bool = Universal.tagProject fn_bool +end*} + +text {* + Using the injection functions, we can inject the integer @{ML_text "13"} + and the boolean value @{ML_text "true"} into @{ML_type Universal.universal}, and + then store them in a @{ML_type "Universal.universal list"} as follows: +*} + +ML{*val foo_list = +let + val thirteen = inject_int 13 + val truth_val = inject_bool true +in + [thirteen, truth_val] +end*} + +text {* + The data can be retrieved with the projection functions defined above. + + @{ML_response_fake [display, gray] +"project_int (nth foo_list 0); +project_bool (nth foo_list 1)" +"13 +true"} + + Notice that we access the integer as an integer and the boolean as + a boolean. If we attempt to access the integer as a boolean, then we get + a runtime error. + + @{ML_response_fake [display, gray] +"project_bool (nth foo_list 0)" +"*** Exception- Match raised"} + + This runtime error is the reason why ML is still type-sound despite + containing a universal type. + + Now, Isabelle heavily uses this mechanism for storing all sorts of + data: theorem lists, simpsets, facts etc. Roughly speaking, there are two + places where data can be stored in Isabelle: in \emph{theories} and in \emph{proof + contexts}. Data such as simpsets are ``global'' and therefore need to be stored + in a theory (simpsets need to be maintained across proofs and even across + theories). On the other hand, data such as facts change inside a proof and + are only relevant to the proof at hand. Therefore such data needs to be + maintained inside a proof context, which represents ``local'' data. + + For theories and proof contexts there are, respectively, the functors + @{ML_funct_ind Theory_Data} and @{ML_funct_ind Proof_Data} that help + with the data storage. Below we show how to implement a table in which you + can store theorems and look them up according to a string key. The + intention in this example is to be able to look up introduction rules for logical + connectives. Such a table might be useful in an automatic proof procedure + and therefore it makes sense to store this data inside a theory. + Consequently we use the functor @{ML_funct Theory_Data}. + The code for the table is: +*} + +ML %linenosgray{*structure Data = Theory_Data + (type T = thm Symtab.table + val empty = Symtab.empty + val extend = I + val merge = Symtab.merge (K true))*} + +text {* + In order to store data in a theory, we have to specify the type of the data + (Line 2). In this case we specify the type @{ML_type "thm Symtab.table"}, + which stands for a table in which @{ML_type string}s can be looked up + producing an associated @{ML_type thm}. We also have to specify four + functions to use this functor: namely how to initialise the data storage + (Line 3), how to extend it (Line 4) and how two + tables should be merged (Line 5). These functions correspond roughly to the + operations performed on theories and we just give some sensible + defaults.\footnote{\bf FIXME: Say more about the + assumptions of these operations.} The result structure @{ML_text Data} + contains functions for accessing the table (@{ML Data.get}) and for updating + it (@{ML Data.map}). There is also the functions @{ML Data.put}, which however is + not relevant here. Below we define two + auxiliary functions, which help us with accessing the table. +*} + +ML{*val lookup = Symtab.lookup o Data.get +fun update k v = Data.map (Symtab.update (k, v))*} + +text {* + Since we want to store introduction rules associated with their + logical connective, we can fill the table as follows. +*} + +setup %gray {* + update "op &" @{thm conjI} #> + update "op -->" @{thm impI} #> + update "All" @{thm allI} +*} + +text {* + The use of the command \isacommand{setup} makes sure the table in the + \emph{current} theory is updated (this is explained further in + section~\ref{sec:theories}). The lookup can now be performed as follows. + + @{ML_response_fake [display, gray] +"lookup @{theory} \"op &\"" +"SOME \"\?P; ?Q\ \ ?P \ ?Q\""} + + An important point to note is that these tables (and data in general) + need to be treated in a purely functional fashion. Although + we can update the table as follows +*} + +setup %gray {* update "op &" @{thm TrueI} *} + +text {* + and accordingly, @{ML lookup} now produces the introduction rule for @{term "True"} + +@{ML_response_fake [display, gray] +"lookup @{theory} \"op &\"" +"SOME \"True\""} + + there are no references involved. This is one of the most fundamental + coding conventions for programming in Isabelle. References + interfere with the multithreaded execution model of Isabelle and also + defeat its undo-mechanism. To see the latter, consider the + following data container where we maintain a reference to a list of + integers. +*} + +ML{*structure WrongRefData = Theory_Data + (type T = (int list) Unsynchronized.ref + val empty = Unsynchronized.ref [] + val extend = I + val merge = fst)*} + +text {* + We initialise the reference with the empty list. Consequently a first + lookup produces @{ML "ref []" in Unsynchronized}. + + @{ML_response_fake [display,gray] + "WrongRefData.get @{theory}" + "ref []"} + + For updating the reference we use the following function +*} + +ML{*fun ref_update n = WrongRefData.map + (fn r => let val _ = r := n::(!r) in r end)*} + +text {* + which takes an integer and adds it to the content of the reference. + As before, we update the reference with the command + \isacommand{setup}. +*} + +setup %gray {* ref_update 1 *} + +text {* + A lookup in the current theory gives then the expected list + @{ML "ref [1]" in Unsynchronized}. + + @{ML_response_fake [display,gray] + "WrongRefData.get @{theory}" + "ref [1]"} + + So far everything is as expected. But, the trouble starts if we attempt to + backtrack to the ``point'' before the \isacommand{setup}-command. There, we + would expect that the list is empty again. But since it is stored in a + reference, Isabelle has no control over it. So it is not empty, but still + @{ML "ref [1]" in Unsynchronized}. Adding to the trouble, if we execute the + \isacommand{setup}-command again, we do not obtain @{ML "ref [1]" in + Unsynchronized}, but + + @{ML_response_fake [display,gray] + "WrongRefData.get @{theory}" + "ref [1, 1]"} + + Now imagine how often you go backwards and forwards in your proof scripts. + By using references in Isabelle code, you are bound to cause all + hell to break loose. Therefore observe the coding convention: + Do not use references for storing data! + + \begin{readmore} + The functors for data storage are defined in @{ML_file "Pure/context.ML"}. + Isabelle contains implementations of several container data structures, + including association lists in @{ML_file "Pure/General/alist.ML"}, + directed graphs in @{ML_file "Pure/General/graph.ML"}, and + tables and symtables in @{ML_file "Pure/General/table.ML"}. + \end{readmore} + + Storing data in a proof context is done in a similar fashion. As mentioned + before, the corresponding functor is @{ML_funct_ind Proof_Data}. With the + following code we can store a list of terms in a proof context. +*} + +ML{*structure Data = Proof_Data + (type T = term list + fun init _ = [])*} + +text {* + The init-function we have to specify must produce a list for when a context + is initialised (possibly taking the theory into account from which the + context is derived). We choose here to just return the empty list. Next + we define two auxiliary functions for updating the list with a given + term and printing the list. +*} + +ML{*fun update trm = Data.map (fn trms => trm::trms) + +fun print ctxt = + case (Data.get ctxt) of + [] => writeln "Empty!" + | trms => pwriteln (pretty_terms ctxt trms)*} + +text {* + Next we start with the context generated by the antiquotation + @{text "@{context}"} and update it in various ways. + + @{ML_response_fake [display,gray] +"let + val ctxt0 = @{context} + val ctxt1 = ctxt0 |> update @{term \"False\"} + |> update @{term \"True \ True\"} + val ctxt2 = ctxt0 |> update @{term \"1::nat\"} + val ctxt3 = ctxt2 |> update @{term \"2::nat\"} +in + print ctxt0; + print ctxt1; + print ctxt2; + print ctxt3 +end" +"Empty! +True \ True, False +1 +2, 1"} + + Many functions in Isabelle manage and update data in a similar + fashion. Consequently, such calculations with contexts occur frequently in + Isabelle code, although the ``context flow'' is usually only linear. + Note also that the calculation above has no effect on the underlying + theory. Once we throw away the contexts, we have no access to their + associated data. This is different for theories, where the command + \isacommand{setup} registers the data with the current and future + theories, and therefore one can access the data potentially + indefinitely. + + For convenience there is an abstract layer, namely the type @{ML_type Context.generic}, + for treating theories and proof contexts more uniformly. This type is defined as follows +*} + +ML_val{*datatype generic = + Theory of theory +| Proof of proof*} + +text {* + \footnote{\bf FIXME: say more about generic contexts.} + + There are two special instances of the data storage mechanism described + above. The first instance implements named theorem lists using the functor + @{ML_funct_ind Named_Thms}. This is because storing theorems in a list + is such a common task. To obtain a named theorem list, you just declare +*} + +ML{*structure FooRules = Named_Thms + (val name = "foo" + val description = "Theorems for foo") *} + +text {* + and set up the @{ML_struct FooRules} with the command +*} + +setup %gray {* FooRules.setup *} + +text {* + This code declares a data container where the theorems are stored, + an attribute @{text foo} (with the @{text add} and @{text del} options + for adding and deleting theorems) and an internal ML-interface for retrieving and + modifying the theorems. + Furthermore, the theorems are made available on the user-level under the name + @{text foo}. For example you can declare three lemmas to be a member of the + theorem list @{text foo} by: +*} + +lemma rule1[foo]: "A" sorry +lemma rule2[foo]: "B" sorry +lemma rule3[foo]: "C" sorry + +text {* and undeclare the first one by: *} + +declare rule1[foo del] + +text {* You can query the remaining ones with: + + \begin{isabelle} + \isacommand{thm}~@{text "foo"}\\ + @{text "> ?C"}\\ + @{text "> ?B"} + \end{isabelle} + + On the ML-level, we can add theorems to the list with @{ML FooRules.add_thm}: +*} + +setup %gray {* Context.theory_map (FooRules.add_thm @{thm TrueI}) *} + +text {* + The rules in the list can be retrieved using the function + @{ML FooRules.get}: + + @{ML_response_fake [display,gray] + "FooRules.get @{context}" + "[\"True\", \"?C\",\"?B\"]"} + + Note that this function takes a proof context as argument. This might be + confusing, since the theorem list is stored as theory data. It becomes clear by knowing + that the proof context contains the information about the current theory and so the function + can access the theorem list in the theory via the context. + + \begin{readmore} + For more information about named theorem lists see + @{ML_file "Pure/Tools/named_thms.ML"}. + \end{readmore} + + The second special instance of the data storage mechanism are configuration + values. They are used to enable users to configure tools without having to + resort to the ML-level (and also to avoid references). Assume you want the + user to control three values, say @{text bval} containing a boolean, @{text + ival} containing an integer and @{text sval} containing a string. These + values can be declared by +*} + +ML{*val (bval, setup_bval) = Attrib.config_bool "bval" (K false) +val (ival, setup_ival) = Attrib.config_int "ival" (K 0) +val (sval, setup_sval) = Attrib.config_string "sval" (K "some string") *} + +text {* + where each value needs to be given a default. To enable these values on the + user-level, they need to be set up with +*} + +setup %gray {* + setup_bval #> + setup_ival #> + setup_sval +*} + +text {* + The user can now manipulate the values from the user-level of Isabelle + with the command +*} + +declare [[bval = true, ival = 3]] + +text {* + On the ML-level these values can be retrieved using the + function @{ML_ind get in Config} from a proof context + + @{ML_response [display,gray] + "Config.get @{context} bval" + "true"} + + or directly from a theory using the function @{ML_ind get_global in Config} + + @{ML_response [display,gray] + "Config.get_global @{theory} bval" + "true"} + + It is also possible to manipulate the configuration values + from the ML-level with the functions @{ML_ind put in Config} + and @{ML_ind put_global in Config}. For example + + @{ML_response [display,gray] + "let + val ctxt = @{context} + val ctxt' = Config.put sval \"foo\" ctxt + val ctxt'' = Config.put sval \"bar\" ctxt' +in + (Config.get ctxt sval, + Config.get ctxt' sval, + Config.get ctxt'' sval) +end" + "(\"some string\", \"foo\", \"bar\")"} + + \begin{readmore} + For more information about configuration values see + the files @{ML_file "Pure/Isar/attrib.ML"} and + @{ML_file "Pure/config.ML"}. + \end{readmore} +*} + +section {* Summary *} + +text {* + This chapter describes the combinators that are used in Isabelle, as well + as a simple printing infrastructure for @{ML_type term}, @{ML_type cterm} + and @{ML_type thm}. The section on ML-antiquotations shows how to refer + statically to entities from the logic level of Isabelle. Isabelle also + contains mechanisms for storing arbitrary data in theory and proof + contexts. + + \begin{conventions} + \begin{itemize} + \item Print messages that belong together in a single string. + \item Do not use references in Isabelle code. + \end{itemize} + \end{conventions} + +*} + + +(**************************************************) +(* bak *) +(**************************************************) + +(* +section {* Do Not Try This At Home! *} + +ML {* val my_thms = ref ([]: thm list) *} + +attribute_setup my_thm_bad = + {* Scan.succeed (Thm.declaration_attribute (fn th => fn ctxt => + (my_thms := th :: ! my_thms; ctxt))) *} "bad attribute" + +declare sym [my_thm_bad] +declare refl [my_thm_bad] + +ML "!my_thms" +*) +end