diff -r 609f9ef73494 -r a0edabf14457 CookBook/Parsing.thy --- a/CookBook/Parsing.thy Sat Nov 01 15:20:36 2008 +0100 +++ b/CookBook/Parsing.thy Mon Nov 24 02:51:08 2008 +0100 @@ -32,7 +32,7 @@ *} -section {* Building Up Generic Parsers *} +section {* Building Generic Parsers *} text {* @@ -63,12 +63,15 @@ It is used for example in the function @{ML "(op !!)"} (see below). \end{itemize} - (FIXME: do the exception need to be explained, because the user cannot use them from ``outside''?) + However, note that these exception private to the parser and cannot be accessed + by the programmer (for example to handle them). + + Slightly more general than the parser @{ML "(op $$)"} is the function @{ML + Scan.one}, in that it takes a predicate as argument and then parses exactly + one item from the input list satisfying this prediate. For example the + following parser either consumes an @{ML_text [quotes] "h"} or a @{ML_text + [quotes] "w"}: - Slightly more general than the parser @{ML "(op $$)"} is the function @{ML Scan.one}, in that it - takes a predicate as argument and then parses exactly one item from the input list - satisfying this prediate. For example the following parser either consumes an - @{ML_text [quotes] "h"} or a @{ML_text [quotes] "w"}: @{ML_response [display] "let @@ -132,6 +135,9 @@ end" "((\"h\", [\"e\", \"l\", \"l\", \"o\"]), (\"x\", [\"w\", \"o\", \"r\", \"l\", \"d\"]))"} + The function @{ML Scan.option} works similarly, except no default value can + be given and in the failure case this parser does nothing. + The function @{ML "(op !!)"} helps to produce appropriate error messages during parsing. For example if one wants to parse that @{ML_text p} is immediately followed by @{ML_text q}, or start a completely different parser @{ML_text r}, @@ -214,6 +220,16 @@ @{ML "Scan.repeat1"} is similar, but requires that the parser @{ML_text "p"} succeeds at least once. + Also note that the parser would have aborted with the exception @{ML_text MORE}, if + we had run it only on just @{ML_text [quotes] "hhhh"}. This can be awoided using + the wrapper @{ML Scan.finite} and the ``stopper-token'' @{ML Symbol.stopper}. With + them we can write + + @{ML_response [display] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (explode \"hhhh\")" + "([\"h\", \"h\", \"h\", \"h\"], [])"} + + However, the Isabelle-develloper rarely needs to do this kind of wrapping manually. + After parsing succeeded, one nearly always wants to apply a function on the parsed items. This is done using the function @{ML_open "(p >> f)" for p f} which runs first the parser @{ML_text p} and upon successful completion applies the @@ -264,10 +280,10 @@ generating precise error messages. The following\footnote{There is something funny going on with the pretty printing of the result token list.} -@{ML_response [display] "OuterSyntax.scan Position.none \"hello world\"" -"[OuterLex.Token (\,(OuterLex.Ident, \"hello\"),\), - OuterLex.Token (\,(OuterLex.Space, \" \"),\), - OuterLex.Token (\,(OuterLex.Ident, \"world\"),\)]"} +@{ML_response_fake [display] "OuterSyntax.scan Position.none \"hello world\"" +"[Token (\,(OuterLex.Ident, \"hello\"),\), + Token (\,(OuterLex.Space, \" \"),\), + Token (\,(OuterLex.Ident, \"world\"),\)]"} produces three tokens where the first and the last are identifiers, since @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match @@ -275,10 +291,10 @@ later on will require spaces, comments and the like to have been filtered out. If we parse -@{ML_response [display] "OuterSyntax.scan Position.none \"inductive|for\"" -"[OuterLex.Token (\,(OuterLex.Command, \"inductive\"),\), - OuterLex.Token (\,(OuterLex.Keyword, \"|\"),\), - OuterLex.Token (\,(OuterLex.Keyword, \"for\"),\)]"} +@{ML_response_fake [display] "OuterSyntax.scan Position.none \"inductive|for\"" +"[Token (\,(OuterLex.Command, \"inductive\"),\), + Token (\,(OuterLex.Keyword, \"|\"),\), + Token (\,(OuterLex.Keyword, \"for\"),\)]"} we obtain a list consiting of only command and keyword tokens. If you want to see which keywords and commands are currently known, use @@ -316,7 +332,7 @@ The parser @{ML_open "OuterParse.enum s p" for s p} parses a possibly empty list of items recognised by the parser @{ML_text p}, where the items are - separated by @{ML_text s}. For example + separated by the string @{ML_text s}. For example @{ML_response [display] "let @@ -326,14 +342,62 @@ end" "([\"in\",\"in\",\"in\"],[\])"} + @{ML "OuterParse.enum1"} works similarly, except that the parsed list must + be non-empty. + Note that we had to add a @{ML_text [quotes] "\\n"} at the end of the parsed string, otherwise the parser would have consumed all tokens and then failed with - the exception @{ML_text "MORE"}. @{ML "OuterParse.enum1"} works similarly, - except that the parsed list must be non-empty. + the exception @{ML_text "MORE"}. Like in the previous section, we can avoid + this exception using the wrapper @{ML Scan.finite}. This time, however, we + have to use the ``stopper-token'' @{ML OuterLex.stopper}. We can write + +@{ML_response [display] +"let + val input = OuterSyntax.scan Position.none \"in|in|in\" +in + Scan.finite OuterLex.stopper (OuterParse.enum \"|\" (OuterParse.$$$ \"in\")) input +end" +"([\"in\",\"in\",\"in\"],[])"} + + The function @{ML "OuterParse.!!!"} can be used to force termination of the + parser in case of a dead end, just like @{ML "Scan.!!"} (see previous section), + except that the error message is fixed to be @{text [quotes] "Outer syntax error"} + with a relatively precise description of the failure. For example: + +@{ML_response_fake [display] +"let + val input = OuterSyntax.scan Position.none \"in|\" + val parse_bar_then_in = OuterParse.$$$ \"|\" -- OuterParse.$$$ \"in\" +in + Scan.error (OuterParse.!!! parse_bar_then_in) input +end" +"Exception ERROR \"Outer syntax error: keyword \"|\" expected, +but keyword in was found\" raised" +} *} -text {* FIXME explain @{ML "OuterParse.!!!"} *} + +ML {* +let + val input = filter OuterLex.is_proper (OuterSyntax.scan Position.none "(in foo)") +in + OuterParse.target input +end +*} + +section {* Positional Information *} + +text {* + + @{ML OuterParse.position} + +*} + +ML {* + OuterParse.position +*} + section {* Parsing Inner Syntax *}