diff -r 02f76f1b6e7b -r dee4b3e66dfe CookBook/Parsing.thy --- a/CookBook/Parsing.thy Wed Oct 29 13:58:36 2008 +0100 +++ b/CookBook/Parsing.thy Wed Oct 29 21:46:33 2008 +0100 @@ -248,50 +248,92 @@ Most of the time, however, Isabelle developers have to deal with parsing tokens, not strings. This is because the parsers for the theory syntax, as well as the parsers for the argument syntax of proof methods and attributes use the type - @{ML_type OuterParse.token} (which is identical to the type @{ML_type OuterLex.token}). + @{ML_type OuterLex.token} (which is identical to the type @{ML_type OuterParse.token}). \begin{readmore} The parser functions for the theory syntax are contained in the structure @{ML_struct OuterParse} defined in the file @{ML_file "Pure/Isar/outer_parse.ML"}. The definition for tokens is in the file @{ML_file "Pure/Isar/outer_lex.ML"}. \end{readmore} -*} + + The structure @{ML_struct OuterLex} defines several kinds of token (for example + @{ML "OuterLex.Ident"} for identifiers, @{ML "OuterLex.Keyword"} for keywords and + @{ML "OuterLex.Command"} for commands). + We can generate a token list using the function @{ML "OuterSyntax.scan"}, which we give + below @{ML "Position.none"} as argument since, at the moment, we are not interested in + generating precise error messages. The following\footnote{There is something funny + going on with the pretty printing of the result token list.} + +@{ML_response [display] "OuterSyntax.scan Position.none \"hello world\"" +"[OuterLex.Token (\,(OuterLex.Ident, \"hello\"),\), + OuterLex.Token (\,(OuterLex.Space, \" \"),\), + OuterLex.Token (\,(OuterLex.Ident, \"world\"),\)]"} + + produces three token where the first and the last are identifiers, since + @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match + any other category. The second indicates a space. If we parse + +@{ML_response [display] "OuterSyntax.scan Position.none \"inductive|for\"" +"[OuterLex.Token (\,(OuterLex.Command, \"inductive\"),\), + OuterLex.Token (\,(OuterLex.Keyword, \"|\"),\), + OuterLex.Token (\,(OuterLex.Keyword, \"for\"),\)]"} + + we obtain a list of command/keyword token. -ML {* - let open OuterLex in - OuterSyntax.scan Position.none "for" - end; + Now the parser @{ML "OuterParse.$$$"} parses a single keyword. For example + +@{ML_response [display] +"let + val input1 = OuterSyntax.scan Position.none \"where for\" + val input2 = OuterSyntax.scan Position.none \"|in\" +in + (OuterParse.$$$ \"where\" input1, OuterParse.$$$ \"|\" input2) +end" +"((\"where\",\),(\"|\",\))"} + + Like before, we can sequentially connect parsers with @{ML "(op --)"}. For example + follows + +@{ML_response [display] +"let + val input = OuterSyntax.scan Position.none \"|in\" +in + (OuterParse.$$$ \"|\" -- OuterParse.$$$ \"in\") input +end" +"((\"|\",\"in\"),[])"} + + The parser @{ML_open "OuterParse.enum s p" for s p} parses a possibly empty + list of items recognized by the parser @{ML_text p}, where the items are + separated by @{ML_text s}. For example + +@{ML_response [display] +"let + val input = OuterSyntax.scan Position.none \"in|in|in\\n\" +in + (OuterParse.enum \"|\" (OuterParse.$$$ \"in\")) input +end" +"([\"in\",\"in\",\"in\"],[\])"} + + @{ML_open "OuterParse.enum1"} works similarly, except that the list must be non-empty. *} -ML {* +text {* FIXME explain @{ML "OuterParse.!!!"} *} + +section {* Parsing Inner Syntax *} -fun my_scan lex pos str = - Source.of_string str - |> Symbol.source {do_recover = false} - |> OuterLex.source {do_recover = SOME false} - (fn () => pairself (Scan.make_lexicon o map Symbol.explode) lex) pos - |> Source.exhaust; +ML {* +let + val input = OuterSyntax.scan Position.none "0" +in + OuterParse.prop input +end *} -ML {* -let val toks = my_scan (["hello"], []) Position.none "hello foo bar" -in (OuterParse.$$$ "hello") toks end -*} - -ML {* +text {* FIXME funny output for a proposition *} - let val input = [OuterLex.mk_text "hello", OuterLex.mk_text "world"]; - in (Scan.one (fn _ => true)) input end -*} - -text {* - explain @{ML "OuterParse.enum1"}, @{ML "OuterParse.prop"} - @{ML "OuterParse.opt_target"}, @{ML "OuterParse.fixes"} - @{ML "OuterParse.!!!"}, @{ML "OuterParse.for_fixes"} -*} chapter {* Parsing *}