diff -r 81e2d73f7191 -r 4daf913fdbe1 CookBook/Parsing.thy --- a/CookBook/Parsing.thy Wed Oct 29 21:51:25 2008 +0100 +++ b/CookBook/Parsing.thy Thu Oct 30 13:36:51 2008 +0100 @@ -15,7 +15,7 @@ Isabelle uses a rather general and sophisticated algorithm due to Earley, which is driven by priority grammars. Parsers for outer syntax are built up by functional parsing combinators. These combinators are a well-established technique for parsing, - which has, for example, been described in Paulson's classic book \cite{paulson-ml2}. + which has, for example, been described in Paulson's classic ML-book \cite{paulson-ml2}. Isabelle developers are usually concerned with writing these outer syntax parsers, either for new definitional packages or for calling tactics with specific arguments. @@ -110,11 +110,11 @@ @{ML_response [display] "let - val just_h = ($$ \"h\") |-- ($$ \"e\") - val just_e = ($$ \"h\") --| ($$ \"e\") + val just_e = ($$ \"h\") |-- ($$ \"e\") + val just_h = ($$ \"h\") --| ($$ \"e\") val input = (explode \"hello\") in - (just_h input, just_e input) + (just_e input, just_h input) end" "((\"e\", [\"l\", \"l\", \"o\"]),(\"h\", [\"l\", \"l\", \"o\"]))"} @@ -182,7 +182,7 @@ *} ML {* - fun p_followed_by_q p q r = +fun p_followed_by_q p q r = let val err = (fn _ => p ^ " is not followed by " ^ q) in @@ -258,7 +258,9 @@ The structure @{ML_struct OuterLex} defines several kinds of token (for example @{ML "OuterLex.Ident"} for identifiers, @{ML "OuterLex.Keyword"} for keywords and - @{ML "OuterLex.Command"} for commands). + @{ML "OuterLex.Command"} for commands). Some parsers take into account the + kind of token. + We can generate a token list using the function @{ML "OuterSyntax.scan"}, which we give below @{ML "Position.none"} as argument since, at the moment, we are not interested in generating precise error messages. The following\footnote{There is something funny @@ -269,7 +271,7 @@ OuterLex.Token (\,(OuterLex.Space, \" \"),\), OuterLex.Token (\,(OuterLex.Ident, \"world\"),\)]"} - produces three token where the first and the last are identifiers, since + produces three tokens where the first and the last are identifiers, since @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match any other category. The second indicates a space. If we parse @@ -278,7 +280,18 @@ OuterLex.Token (\,(OuterLex.Keyword, \"|\"),\), OuterLex.Token (\,(OuterLex.Keyword, \"for\"),\)]"} - we obtain a list of command/keyword token. + we obtain a list of command and keyword tokens. + If you want to see which keywords and commands are currently known, use + the following (you might have to adjust the @{ML print_depth} in order to + see the complete list): + +@{ML_response_fake [display] +"let + val (keywords, commands) = OuterKeyword.get_lexicons () +in + (Scan.dest_lexicon commands, Scan.dest_lexicon keywords) +end" +"([\"}\",\"{\",\],[\"\\",\"\\",\])"} Now the parser @{ML "OuterParse.$$$"} parses a single keyword. For example @@ -314,7 +327,10 @@ end" "([\"in\",\"in\",\"in\"],[\])"} - @{ML_open "OuterParse.enum1"} works similarly, except that the list must be non-empty. + Note that we had to add a @{ML_text [quotes] "\\n"} at the end of the parsed + string, otherwise the parser would have consumed all tokens and then failed with + the exception @{ML_text "FAIL"}. @{ML "OuterParse.enum1"} works similarly, + except that the parsed list must be non-empty. *} @@ -331,7 +347,7 @@ *} -text {* FIXME funny output for a proposition *} +text {* (FIXME funny output for a proposition) *} @@ -562,21 +578,36 @@ *} -text {* - FIXME + + - @{text " - begin{verbatim} - type token = T.token ; - val toks : token list = OuterSyntax.scan ``theory,imports;begin x.y.z apply ?v1 ?'a 'a -- || 44 simp (* xx *) { * fff * }'' ; +ML {* + val toks = OuterSyntax.scan Position.none + "theory,imports;begin x.y.z apply ?v1 ?'a 'a -- || 44 simp (* xx *) { * fff * }" ; +*} + +ML {* print_depth 20 ; - List.map T.text_of toks ; - val proper_toks = List.filter T.is_proper toks ; - List.map T.kind_of proper_toks ; - List.map T.unparse proper_toks ; - List.map T.val_of proper_toks ; - end{verbatim}"} +*} + +ML {* + map OuterLex.text_of toks ; +*} + +ML {* + val proper_toks = filter OuterLex.is_proper toks ; +*} +ML {* + map OuterLex.kind_of proper_toks +*} + +ML {* + map OuterLex.unparse proper_toks ; +*} + +ML {* + OuterLex.stopper *} text {*