ProgTutorial/Parsing.thy
changeset 517 d8c376662bb4
parent 514 7e25716c3744
child 519 cf471fa86091
equal deleted inserted replaced
516:fb6c29a90003 517:d8c376662bb4
     1 theory Parsing
     1 theory Parsing
     2 imports Base "Helper/Command/Command" "Package/Simple_Inductive_Package"
     2 imports Base "Helper/Command/Command" "Package/Simple_Inductive_Package"
     3 begin
     3 begin
     4 
       
     5 (*<*)
       
     6 setup {*
       
     7 open_file_with_prelude 
       
     8 "Parsing_Code.thy"
       
     9 ["theory Parsing", 
       
    10  "imports Base \"Package/Simple_Inductive_Package\"", 
       
    11  "begin"]
       
    12 *}
       
    13 (*>*)
       
    14 
     4 
    15 chapter {* Parsing\label{chp:parsing} *}
     5 chapter {* Parsing\label{chp:parsing} *}
    16 
     6 
    17 text {*
     7 text {*
    18   \begin{flushright}
     8   \begin{flushright}
   251   Let us now return to our example of parsing @{ML "(p -- q) || r" for p q
   241   Let us now return to our example of parsing @{ML "(p -- q) || r" for p q
   252   r}. If you want to generate the correct error message for failure
   242   r}. If you want to generate the correct error message for failure
   253   of parsing @{text "p"}-followed-by-@{text "q"}, then you have to write:
   243   of parsing @{text "p"}-followed-by-@{text "q"}, then you have to write:
   254 *}
   244 *}
   255 
   245 
   256 ML{*fun p_followed_by_q p q r =
   246 ML %grayML{*fun p_followed_by_q p q r =
   257 let 
   247 let 
   258   val err_msg = fn _ => p ^ " is not followed by " ^ q
   248   val err_msg = fn _ => p ^ " is not followed by " ^ q
   259 in
   249 in
   260   ($$ p -- (!! (fn _ => err_msg) ($$ q))) || ($$ r -- $$ r)
   250   ($$ p -- (!! (fn _ => err_msg) ($$ q))) || ($$ r -- $$ r)
   261 end *}
   251 end *}
   387   the tree corresponding to the string @{text [quotes] "(A, A), (A, A)"} where
   377   the tree corresponding to the string @{text [quotes] "(A, A), (A, A)"} where
   388   the @{text "A"}s will be the leaves.  We assume the trees are represented by the
   378   the @{text "A"}s will be the leaves.  We assume the trees are represented by the
   389   datatype:
   379   datatype:
   390 *}
   380 *}
   391 
   381 
   392 ML{*datatype tree = 
   382 ML %grayML{*datatype tree = 
   393     Lf of string 
   383     Lf of string 
   394   | Br of tree * tree*}
   384   | Br of tree * tree*}
   395 
   385 
   396 text {*
   386 text {*
   397   Since nested parentheses should be treated in a meaningful way---for example
   387   Since nested parentheses should be treated in a meaningful way---for example
   398   the string @{text [quotes] "((A))"} should be read into a single 
   388   the string @{text [quotes] "((A))"} should be read into a single 
   399   leaf---you might implement the following parser.
   389   leaf---you might implement the following parser.
   400 *}
   390 *}
   401 
   391 
   402 ML{*fun parse_basic s = 
   392 ML %grayML{*fun parse_basic s = 
   403   $$ s >> Lf || $$ "(" |-- parse_tree s --| $$ ")"  
   393   $$ s >> Lf || $$ "(" |-- parse_tree s --| $$ ")"  
   404 
   394 
   405 and parse_tree s = 
   395 and parse_tree s = 
   406   parse_basic s --| $$ "," -- parse_tree s >> Br || parse_basic s*}
   396   parse_basic s --| $$ "," -- parse_tree s >> Br || parse_basic s*}
   407 
   397 
   432   parser until an input is given. This can be done by adding the parsed
   422   parser until an input is given. This can be done by adding the parsed
   433   string as an explicit argument. So the parser above should be implemented
   423   string as an explicit argument. So the parser above should be implemented
   434   as follows.
   424   as follows.
   435 *}
   425 *}
   436 
   426 
   437 ML{*fun parse_basic s xs = 
   427 ML %grayML{*fun parse_basic s xs = 
   438   ($$ s >> Lf || $$ "(" |-- parse_tree s --| $$ ")") xs 
   428   ($$ s >> Lf || $$ "(" |-- parse_tree s --| $$ ")") xs 
   439 
   429 
   440 and parse_tree s xs = 
   430 and parse_tree s xs = 
   441   (parse_basic s --| $$ "," -- parse_tree s >> Br || parse_basic s) xs*}
   431   (parse_basic s --| $$ "," -- parse_tree s >> Br || parse_basic s) xs*}
   442 
   432 
   470 text {*
   460 text {*
   471   Most of the time, however, Isabelle developers have to deal with parsing
   461   Most of the time, however, Isabelle developers have to deal with parsing
   472   tokens, not strings.  These token parsers have the type:
   462   tokens, not strings.  These token parsers have the type:
   473 *}
   463 *}
   474   
   464   
   475 ML{*type 'a parser = Token.T list -> 'a * Token.T list*}
   465 ML %grayML{*type 'a parser = Token.T list -> 'a * Token.T list*}
   476 
   466 
   477 text {*
   467 text {*
   478   {\bf REDO!!}
   468   {\bf REDO!!}
   479 
   469 
   480 
   470 
   510 
   500 
   511   We can easily change what is recognised as a keyword with the function
   501   We can easily change what is recognised as a keyword with the function
   512   @{ML_ind define in Keyword}. For example calling it with 
   502   @{ML_ind define in Keyword}. For example calling it with 
   513 *}
   503 *}
   514 
   504 
   515 ML{*val _ = Keyword.define ("hello", NONE) *}
   505 ML %grayML{*val _ = Keyword.define ("hello", NONE) *}
   516 
   506 
   517 text {*
   507 text {*
   518   then lexing @{text [quotes] "hello world"} will produce
   508   then lexing @{text [quotes] "hello world"} will produce
   519 
   509 
   520   @{ML_response_fake [display,gray] "Outer_Syntax.scan Position.none \"hello world\"" 
   510   @{ML_response_fake [display,gray] "Outer_Syntax.scan Position.none \"hello world\"" 
   536 "[Token (\<dots>,(Ident, \"hello\"), \<dots>), Token (\<dots>,(Ident, \"world\"), \<dots>)]"}
   526 "[Token (\<dots>,(Ident, \"hello\"), \<dots>), Token (\<dots>,(Ident, \"world\"), \<dots>)]"}
   537 
   527 
   538   For convenience we define the function:
   528   For convenience we define the function:
   539 *}
   529 *}
   540 
   530 
   541 ML{*fun filtered_input str = 
   531 ML %grayML{*fun filtered_input str = 
   542   filter Token.is_proper (Outer_Syntax.scan Position.none str) *}
   532   filter Token.is_proper (Outer_Syntax.scan Position.none str) *}
   543 
   533 
   544 text {* 
   534 text {* 
   545   If you now parse
   535   If you now parse
   546 
   536 
   628 "([\"in\", \"in\", \"in\"], [])"}
   618 "([\"in\", \"in\", \"in\"], [])"}
   629 
   619 
   630   The following function will help to run examples.
   620   The following function will help to run examples.
   631 *}
   621 *}
   632 
   622 
   633 ML{*fun parse p input = Scan.finite Token.stopper (Scan.error p) input *}
   623 ML %grayML{*fun parse p input = Scan.finite Token.stopper (Scan.error p) input *}
   634 
   624 
   635 text {*
   625 text {*
   636   The function @{ML_ind "!!!" in Parse} can be used to force termination
   626   The function @{ML_ind "!!!" in Parse} can be used to force termination
   637   of the parser in case of a dead end, just like @{ML "Scan.!!"} (see previous
   627   of the parser in case of a dead end, just like @{ML "Scan.!!"} (see previous
   638   section). A difference, however, is that the error message of @{ML
   628   section). A difference, however, is that the error message of @{ML
   663   occurred. For this Isabelle can attach positional information to tokens
   653   occurred. For this Isabelle can attach positional information to tokens
   664   and then thread this information up the ``processing chain''. To see this,
   654   and then thread this information up the ``processing chain''. To see this,
   665   modify the function @{ML filtered_input}, described earlier, as follows 
   655   modify the function @{ML filtered_input}, described earlier, as follows 
   666 *}
   656 *}
   667 
   657 
   668 ML{*fun filtered_input' str = 
   658 ML %grayML{*fun filtered_input' str = 
   669        filter Token.is_proper (Outer_Syntax.scan (Position.line 7) str) *}
   659        filter Token.is_proper (Outer_Syntax.scan (Position.line 7) str) *}
   670 
   660 
   671 text {*
   661 text {*
   672   where we pretend the parsed string starts on line 7. An example is
   662   where we pretend the parsed string starts on line 7. An example is
   673 
   663 
   957   To keep things simple, let us start with a ``silly'' command that does nothing 
   947   To keep things simple, let us start with a ``silly'' command that does nothing 
   958   at all. We shall name this command \isacommand{foobar}. On the ML-level it can be 
   948   at all. We shall name this command \isacommand{foobar}. On the ML-level it can be 
   959   defined as:
   949   defined as:
   960 *}
   950 *}
   961 
   951 
   962 ML{*let
   952 ML %grayML{*let
   963   val do_nothing = Scan.succeed (Local_Theory.background_theory I)
   953   val do_nothing = Scan.succeed (Local_Theory.background_theory I)
   964   val kind = Keyword.thy_decl
   954   val kind = Keyword.thy_decl
   965 in
   955 in
   966   Outer_Syntax.local_theory ("foobar", kind) "description of foobar" do_nothing
   956   Outer_Syntax.local_theory ("foobar", kind) "description of foobar" do_nothing
   967 end *}
   957 end *}
  1102   to interact with ProofGeneral. A similar procedure has to be done with any 
  1092   to interact with ProofGeneral. A similar procedure has to be done with any 
  1103   other new command, and also any new keyword that is introduced with 
  1093   other new command, and also any new keyword that is introduced with 
  1104   the function @{ML_ind define in Keyword}. For example:
  1094   the function @{ML_ind define in Keyword}. For example:
  1105 *}
  1095 *}
  1106 
  1096 
  1107 ML{*val _ = Keyword.define ("blink", NONE) *}
  1097 ML %grayML{*val _ = Keyword.define ("blink", NONE) *}
  1108 
  1098 
  1109 text {*
  1099 text {*
  1110   At the moment the command \isacommand{foobar} is not very useful. Let us
  1100   At the moment the command \isacommand{foobar} is not very useful. Let us
  1111   refine it a bit next by letting it take a proposition as argument and
  1101   refine it a bit next by letting it take a proposition as argument and
  1112   printing this proposition inside the tracing buffer.
  1102   printing this proposition inside the tracing buffer.
  1119   parser @{ML Parse.prop}), then prints out the tracing
  1109   parser @{ML Parse.prop}), then prints out the tracing
  1120   information (using a new function @{text trace_prop}) and 
  1110   information (using a new function @{text trace_prop}) and 
  1121   finally does nothing. For this you can write:
  1111   finally does nothing. For this you can write:
  1122 *}
  1112 *}
  1123 
  1113 
  1124 ML{*let
  1114 ML %grayML{*let
  1125   fun trace_prop str = 
  1115   fun trace_prop str = 
  1126      Local_Theory.background_theory (fn ctxt => (tracing str; ctxt))
  1116      Local_Theory.background_theory (fn ctxt => (tracing str; ctxt))
  1127 
  1117 
  1128   val kind = Keyword.thy_decl
  1118   val kind = Keyword.thy_decl
  1129 in
  1119 in
  1212   fun init thy () = error "Result")
  1202   fun init thy () = error "Result")
  1213 
  1203 
  1214 val result_cookie = (Result.get, Result.put, "Result.put")
  1204 val result_cookie = (Result.get, Result.put, "Result.put")
  1215 *}
  1205 *}
  1216 
  1206 
  1217 ML{*let
  1207 ML %grayML{*let
  1218    fun after_qed thm_name thms lthy =
  1208    fun after_qed thm_name thms lthy =
  1219         Local_Theory.note (thm_name, (flat thms)) lthy |> snd
  1209         Local_Theory.note (thm_name, (flat thms)) lthy |> snd
  1220 
  1210 
  1221    fun setup_proof (thm_name, (txt, pos)) lthy =
  1211    fun setup_proof (thm_name, (txt, pos)) lthy =
  1222    let
  1212    let