diff -r b63c72776f03 -r d111f5988e49 ProgTutorial/Parsing.thy --- a/ProgTutorial/Parsing.thy Wed Apr 15 13:11:08 2009 +0000 +++ b/ProgTutorial/Parsing.thy Sat Apr 25 14:28:58 2009 +0200 @@ -35,24 +35,28 @@ text {* - Let us first have a look at parsing strings using generic parsing combinators. - The function @{ML "$$"} takes a string as argument and will ``consume'' this string from - a given input list of strings. ``Consume'' in this context means that it will - return a pair consisting of this string and the rest of the input list. - For example: + Let us first have a look at parsing strings using generic parsing + combinators. The function @{ML "$$"} takes a string as argument and will + ``consume'' this string from a given input list of strings. ``Consume'' in + this context means that it will return a pair consisting of this string and + the rest of the input list. For example: - @{ML_response [display,gray] "($$ \"h\") (explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"} + @{ML_response [display,gray] + "($$ \"h\") (Symbol.explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"} - @{ML_response [display,gray] "($$ \"w\") (explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"} + @{ML_response [display,gray] + "($$ \"w\") (Symbol.explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"} - The function @{ML "$$"} will either succeed (as in the two examples above) or raise the exception - @{text "FAIL"} if no string can be consumed. For example trying to parse + The function @{ML "$$"} will either succeed (as in the two examples above) + or raise the exception @{text "FAIL"} if no string can be consumed. For + example trying to parse - @{ML_response_fake [display,gray] "($$ \"x\") (explode \"world\")" - "Exception FAIL raised"} + @{ML_response_fake [display,gray] + "($$ \"x\") (Symbol.explode \"world\")" + "Exception FAIL raised"} - will raise the exception @{text "FAIL"}. - There are three exceptions used in the parsing combinators: + will raise the exception @{text "FAIL"}. There are three exceptions used in + the parsing combinators: \begin{itemize} \item @{text "FAIL"} is used to indicate that alternative routes of parsing @@ -65,19 +69,33 @@ However, note that these exceptions are private to the parser and cannot be accessed by the programmer (for example to handle them). - + + In the examples above we use the function @{ML Symbol.explode}, instead of the + more standard library function @{ML explode}, for obtaining an input list for + the parser. The reason is that @{ML Symbol.explode} is aware of character sequences, + for example @{text "\\"}, that have a special meaning in Isabelle. To see + the difference consider + +@{ML_response_fake [display,gray] +"let + val input = \"\\ bar\" +in + (explode input, Symbol.explode input) +end" +"([\"\\\", \"<\", \"f\", \"o\", \"o\", \">\", \" \", \"b\", \"a\", \"r\"], + [\"\\\", \" \", \"b\", \"a\", \"r\"])"} + Slightly more general than the parser @{ML "$$"} is the function @{ML Scan.one}, in that it takes a predicate as argument and then parses exactly one item from the input list satisfying this predicate. For example the following parser either consumes an @{text [quotes] "h"} or a @{text [quotes] "w"}: - @{ML_response [display,gray] "let val hw = Scan.one (fn x => x = \"h\" orelse x = \"w\") - val input1 = explode \"hello\" - val input2 = explode \"world\" + val input1 = Symbol.explode \"hello\" + val input2 = Symbol.explode \"world\" in (hw input1, hw input2) end" @@ -88,7 +106,7 @@ order) you can achieve by: @{ML_response [display,gray] - "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (explode \"hello\")" + "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (Symbol.explode \"hello\")" "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"} Note how the result of consumed strings builds up on the left as nested pairs. @@ -97,7 +115,7 @@ then you should use the function @{ML Scan.this_string}: @{ML_response [display,gray] - "Scan.this_string \"hell\" (explode \"hello\")" + "Scan.this_string \"hell\" (Symbol.explode \"hello\")" "(\"hell\", [\"o\"])"} Parsers that explore alternatives can be constructed using the function @{ML @@ -109,8 +127,8 @@ @{ML_response [display,gray] "let val hw = $$ \"h\" || $$ \"w\" - val input1 = explode \"hello\" - val input2 = explode \"world\" + val input1 = Symbol.explode \"hello\" + val input2 = Symbol.explode \"world\" in (hw input1, hw input2) end" @@ -124,7 +142,7 @@ "let val just_e = $$ \"h\" |-- $$ \"e\" val just_h = $$ \"h\" --| $$ \"e\" - val input = explode \"hello\" + val input = Symbol.explode \"hello\" in (just_e input, just_h input) end" @@ -137,8 +155,8 @@ @{ML_response [display,gray] "let val p = Scan.optional ($$ \"h\") \"x\" - val input1 = explode \"hello\" - val input2 = explode \"world\" + val input1 = Symbol.explode \"hello\" + val input2 = Symbol.explode \"world\" in (p input1, p input2) end" @@ -150,8 +168,8 @@ @{ML_response [display,gray] "let val p = Scan.option ($$ \"h\") - val input1 = explode \"hello\" - val input2 = explode \"world\" + val input1 = Symbol.explode \"hello\" + val input2 = Symbol.explode \"world\" in (p input1, p input2) end" "((SOME \"h\", [\"e\", \"l\", \"l\", \"o\"]), (NONE, [\"w\", \"o\", \"r\", \"l\", \"d\"]))"} @@ -182,12 +200,12 @@ on @{text [quotes] "hello"}, the parsing succeeds @{ML_response [display,gray] - "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"hello\")" + "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"} but if you invoke it on @{text [quotes] "world"} - @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"world\")" + @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"world\")" "Exception ABORT raised"} then the parsing aborts and the error message @{text "foo"} is printed. In order to @@ -219,12 +237,12 @@ @{text [quotes] "h"}, @{text [quotes] "e"} and @{text [quotes] "w"}, and the input @{text [quotes] "holle"} - @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"holle\")" + @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"holle\")" "Exception ERROR \"h is not followed by e\" raised"} produces the correct error message. Running it with - @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"wworld\")" + @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"wworld\")" "((\"w\", \"w\"), [\"o\", \"r\", \"l\", \"d\"])"} yields the expected parsing. @@ -232,7 +250,7 @@ The function @{ML "Scan.repeat p" for p} will apply a parser @{text p} as often as it succeeds. For example: - @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (explode \"hhhhello\")" + @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (Symbol.explode \"hhhhello\")" "([\"h\", \"h\", \"h\", \"h\"], [\"e\", \"l\", \"l\", \"o\"])"} Note that @{ML "Scan.repeat"} stores the parsed items in a list. The function @@ -244,7 +262,7 @@ the wrapper @{ML Scan.finite} and the ``stopper-token'' @{ML Symbol.stopper}. With them you can write: - @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (explode \"hhhh\")" + @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (Symbol.explode \"hhhh\")" "([\"h\", \"h\", \"h\", \"h\"], [])"} @{ML Symbol.stopper} is the ``end-of-input'' indicator for parsing strings; @@ -257,7 +275,7 @@ @{ML_response [display,gray] "let val p = Scan.repeat (Scan.one Symbol.not_eof) - val input = explode \"foo bar foo\" + val input = Symbol.explode \"foo bar foo\" in Scan.finite Symbol.stopper p input end" @@ -269,12 +287,12 @@ The function @{ML "Scan.unless p q" for p q} takes two parsers: if the first one can parse the input, then the whole parser fails; if not, then the second is tried. Therefore - @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"hello\")" + @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"hello\")" "Exception FAIL raised"} fails, while - @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"world\")" + @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"world\")" "(\"w\",[\"o\", \"r\", \"l\", \"d\"])"} succeeds. @@ -286,8 +304,8 @@ @{ML_response [display,gray] "let val p = Scan.repeat (Scan.unless ($$ \"*\") (Scan.one Symbol.not_eof)) - val input1 = explode \"fooooo\" - val input2 = explode \"foo*ooo\" + val input1 = Symbol.explode \"fooooo\" + val input2 = Symbol.explode \"foo*ooo\" in (Scan.finite Symbol.stopper p input1, Scan.finite Symbol.stopper p input2) @@ -304,7 +322,7 @@ "let fun double (x, y) = (x ^ x, y ^ y) in - (($$ \"h\") -- ($$ \"e\") >> double) (explode \"hello\") + (($$ \"h\") -- ($$ \"e\") >> double) (Symbol.explode \"hello\") end" "((\"hh\", \"ee\"), [\"l\", \"l\", \"o\"])"} @@ -313,7 +331,7 @@ @{ML_response [display,gray] "let val p = Scan.repeat (Scan.one Symbol.not_eof) - val input = explode \"foo bar foo\" + val input = Symbol.explode \"foo bar foo\" in Scan.finite Symbol.stopper (p >> implode) input end" @@ -328,7 +346,7 @@ input unchanged. For example: @{ML_response [display,gray] - "Scan.ahead (Scan.this_string \"foo\") (explode \"foo\")" + "Scan.ahead (Scan.this_string \"foo\") (Symbol.explode \"foo\")" "(\"foo\", [\"f\", \"o\", \"o\"])"} The function @{ML Scan.lift} takes a parser and a pair as arguments. This function applies @@ -336,7 +354,7 @@ untouched. For example @{ML_response [display,gray] -"Scan.lift ($$ \"h\" -- $$ \"e\") (1, explode \"hello\")" +"Scan.lift ($$ \"h\" -- $$ \"e\") (1, Symbol.explode \"hello\")" "((\"h\", \"e\"), (1, [\"l\", \"l\", \"o\"]))"} (FIXME: In which situations is this useful? Give examples.) @@ -1757,8 +1775,7 @@ structure \texttt{Method}, but not published in its signature. The source file \texttt{src/Pure/Isar/method.ML} shows the use of \texttt{Method.add\_method} to add a number of methods. - +*} -*} (*>*) end