CookBook/Parsing.thy
changeset 44 dee4b3e66dfe
parent 43 02f76f1b6e7b
child 47 4daf913fdbe1
equal deleted inserted replaced
43:02f76f1b6e7b 44:dee4b3e66dfe
   246 
   246 
   247 text {*
   247 text {*
   248   Most of the time, however, Isabelle developers have to deal with parsing tokens, not strings.
   248   Most of the time, however, Isabelle developers have to deal with parsing tokens, not strings.
   249   This is because the parsers for the theory syntax, as well as the parsers for the 
   249   This is because the parsers for the theory syntax, as well as the parsers for the 
   250   argument syntax of proof methods and attributes use the type 
   250   argument syntax of proof methods and attributes use the type 
   251   @{ML_type OuterParse.token} (which is identical to the type @{ML_type OuterLex.token}).
   251   @{ML_type OuterLex.token} (which is identical to the type @{ML_type OuterParse.token}).
   252 
   252 
   253   \begin{readmore}
   253   \begin{readmore}
   254   The parser functions for the theory syntax are contained in the structure
   254   The parser functions for the theory syntax are contained in the structure
   255   @{ML_struct OuterParse} defined in the file @{ML_file  "Pure/Isar/outer_parse.ML"}.
   255   @{ML_struct OuterParse} defined in the file @{ML_file  "Pure/Isar/outer_parse.ML"}.
   256   The definition for tokens is in the file @{ML_file "Pure/Isar/outer_lex.ML"}.
   256   The definition for tokens is in the file @{ML_file "Pure/Isar/outer_lex.ML"}.
   257   \end{readmore}
   257   \end{readmore}
   258 *}
   258 
   259 
   259   The structure @{ML_struct OuterLex} defines several kinds of token (for example 
   260 ML {* 
   260   @{ML "OuterLex.Ident"} for identifiers, @{ML "OuterLex.Keyword"} for keywords and
   261   let open OuterLex in 
   261   @{ML "OuterLex.Command"} for commands).
   262   OuterSyntax.scan Position.none "for" 
   262   We can generate a token list using the function @{ML "OuterSyntax.scan"}, which we give
   263   end;
   263   below @{ML "Position.none"} as argument since, at the moment, we are not interested in
   264 
   264   generating precise error messages. The following\footnote{There is something funny 
   265 *}
   265   going on with the pretty printing of the result token list.}
       
   266 
       
   267 @{ML_response [display] "OuterSyntax.scan Position.none \"hello world\"" 
       
   268 "[OuterLex.Token (\<dots>,(OuterLex.Ident, \"hello\"),\<dots>), 
       
   269  OuterLex.Token (\<dots>,(OuterLex.Space, \" \"),\<dots>), 
       
   270  OuterLex.Token (\<dots>,(OuterLex.Ident, \"world\"),\<dots>)]"}
       
   271 
       
   272   produces three token where the first and the last are identifiers, since 
       
   273   @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match 
       
   274   any other category. The second indicates a space. If we parse
       
   275 
       
   276 @{ML_response [display] "OuterSyntax.scan Position.none \"inductive|for\"" 
       
   277 "[OuterLex.Token (\<dots>,(OuterLex.Command, \"inductive\"),\<dots>), 
       
   278  OuterLex.Token (\<dots>,(OuterLex.Keyword, \"|\"),\<dots>), 
       
   279  OuterLex.Token (\<dots>,(OuterLex.Keyword, \"for\"),\<dots>)]"}
       
   280 
       
   281   we obtain a list of command/keyword token.
       
   282 
       
   283   Now the parser @{ML "OuterParse.$$$"} parses a single keyword. For example
       
   284  
       
   285 @{ML_response [display]
       
   286 "let 
       
   287   val input1 = OuterSyntax.scan Position.none \"where for\"
       
   288   val input2 = OuterSyntax.scan Position.none \"|in\"
       
   289 in 
       
   290   (OuterParse.$$$ \"where\" input1, OuterParse.$$$ \"|\" input2)
       
   291 end"
       
   292 "((\"where\",\<dots>),(\"|\",\<dots>))"}
       
   293 
       
   294   Like before, we can sequentially connect parsers with @{ML "(op --)"}. For example 
       
   295   follows
       
   296 
       
   297 @{ML_response [display]
       
   298 "let 
       
   299   val input = OuterSyntax.scan Position.none \"|in\"
       
   300 in 
       
   301   (OuterParse.$$$ \"|\" -- OuterParse.$$$ \"in\") input
       
   302 end"
       
   303 "((\"|\",\"in\"),[])"}
       
   304 
       
   305   The parser @{ML_open "OuterParse.enum s p" for s p} parses a possibly empty 
       
   306   list of items recognized by the parser @{ML_text p}, where the items are 
       
   307   separated by @{ML_text s}. For example
       
   308 
       
   309 @{ML_response [display]
       
   310 "let 
       
   311   val input = OuterSyntax.scan Position.none \"in|in|in\\n\"
       
   312 in 
       
   313   (OuterParse.enum \"|\" (OuterParse.$$$ \"in\")) input
       
   314 end" 
       
   315 "([\"in\",\"in\",\"in\"],[\<dots>])"}
       
   316 
       
   317  @{ML_open "OuterParse.enum1"} works similarly, except that the list must be non-empty.
       
   318 
       
   319 *}
       
   320 
       
   321 text {* FIXME explain @{ML "OuterParse.!!!"} *}
       
   322 
       
   323 section {* Parsing Inner Syntax *}
   266 
   324 
   267 ML {*
   325 ML {*
   268 
   326 let 
   269 fun my_scan lex pos str =
   327   val input = OuterSyntax.scan Position.none "0"
   270    Source.of_string str
   328 in 
   271    |> Symbol.source {do_recover = false}
   329   OuterParse.prop input
   272    |> OuterLex.source {do_recover = SOME false}
   330 end 
   273         (fn () => pairself (Scan.make_lexicon o map Symbol.explode) lex) pos
   331 
   274    |> Source.exhaust;
   332 *}
   275 
   333 
   276 *}
   334 text {* FIXME funny output for a proposition *}
   277 
   335 
   278 ML {*
   336 
   279 let val toks = my_scan (["hello"], []) Position.none "hello foo bar"
       
   280 in (OuterParse.$$$ "hello") toks end
       
   281 *}
       
   282 
       
   283 ML {*
       
   284 
       
   285   let val input = [OuterLex.mk_text "hello", OuterLex.mk_text "world"];
       
   286   in (Scan.one (fn _ => true)) input end
       
   287 
       
   288 *}
       
   289 
       
   290 text {*
       
   291   explain @{ML "OuterParse.enum1"}, @{ML "OuterParse.prop"} 
       
   292    @{ML "OuterParse.opt_target"}, @{ML "OuterParse.fixes"} 
       
   293    @{ML "OuterParse.!!!"}, @{ML "OuterParse.for_fixes"} 
       
   294 *}
       
   295 
   337 
   296 chapter {* Parsing *}
   338 chapter {* Parsing *}
   297 
   339 
   298 text {*
   340 text {*
   299 
   341