--- a/ProgTutorial/Parsing.thy Wed Apr 15 13:11:08 2009 +0000
+++ b/ProgTutorial/Parsing.thy Sat Apr 25 14:28:58 2009 +0200
@@ -35,24 +35,28 @@
text {*
- Let us first have a look at parsing strings using generic parsing combinators.
- The function @{ML "$$"} takes a string as argument and will ``consume'' this string from
- a given input list of strings. ``Consume'' in this context means that it will
- return a pair consisting of this string and the rest of the input list.
- For example:
+ Let us first have a look at parsing strings using generic parsing
+ combinators. The function @{ML "$$"} takes a string as argument and will
+ ``consume'' this string from a given input list of strings. ``Consume'' in
+ this context means that it will return a pair consisting of this string and
+ the rest of the input list. For example:
- @{ML_response [display,gray] "($$ \"h\") (explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
+ @{ML_response [display,gray]
+ "($$ \"h\") (Symbol.explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
- @{ML_response [display,gray] "($$ \"w\") (explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}
+ @{ML_response [display,gray]
+ "($$ \"w\") (Symbol.explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}
- The function @{ML "$$"} will either succeed (as in the two examples above) or raise the exception
- @{text "FAIL"} if no string can be consumed. For example trying to parse
+ The function @{ML "$$"} will either succeed (as in the two examples above)
+ or raise the exception @{text "FAIL"} if no string can be consumed. For
+ example trying to parse
- @{ML_response_fake [display,gray] "($$ \"x\") (explode \"world\")"
- "Exception FAIL raised"}
+ @{ML_response_fake [display,gray]
+ "($$ \"x\") (Symbol.explode \"world\")"
+ "Exception FAIL raised"}
- will raise the exception @{text "FAIL"}.
- There are three exceptions used in the parsing combinators:
+ will raise the exception @{text "FAIL"}. There are three exceptions used in
+ the parsing combinators:
\begin{itemize}
\item @{text "FAIL"} is used to indicate that alternative routes of parsing
@@ -65,19 +69,33 @@
However, note that these exceptions are private to the parser and cannot be accessed
by the programmer (for example to handle them).
-
+
+ In the examples above we use the function @{ML Symbol.explode}, instead of the
+ more standard library function @{ML explode}, for obtaining an input list for
+ the parser. The reason is that @{ML Symbol.explode} is aware of character sequences,
+ for example @{text "\\<foo>"}, that have a special meaning in Isabelle. To see
+ the difference consider
+
+@{ML_response_fake [display,gray]
+"let
+ val input = \"\\<foo> bar\"
+in
+ (explode input, Symbol.explode input)
+end"
+"([\"\\\", \"<\", \"f\", \"o\", \"o\", \">\", \" \", \"b\", \"a\", \"r\"],
+ [\"\\<foo>\", \" \", \"b\", \"a\", \"r\"])"}
+
Slightly more general than the parser @{ML "$$"} is the function @{ML
Scan.one}, in that it takes a predicate as argument and then parses exactly
one item from the input list satisfying this predicate. For example the
following parser either consumes an @{text [quotes] "h"} or a @{text
[quotes] "w"}:
-
@{ML_response [display,gray]
"let
val hw = Scan.one (fn x => x = \"h\" orelse x = \"w\")
- val input1 = explode \"hello\"
- val input2 = explode \"world\"
+ val input1 = Symbol.explode \"hello\"
+ val input2 = Symbol.explode \"world\"
in
(hw input1, hw input2)
end"
@@ -88,7 +106,7 @@
order) you can achieve by:
@{ML_response [display,gray]
- "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (explode \"hello\")"
+ "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (Symbol.explode \"hello\")"
"(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}
Note how the result of consumed strings builds up on the left as nested pairs.
@@ -97,7 +115,7 @@
then you should use the function @{ML Scan.this_string}:
@{ML_response [display,gray]
- "Scan.this_string \"hell\" (explode \"hello\")"
+ "Scan.this_string \"hell\" (Symbol.explode \"hello\")"
"(\"hell\", [\"o\"])"}
Parsers that explore alternatives can be constructed using the function @{ML
@@ -109,8 +127,8 @@
@{ML_response [display,gray]
"let
val hw = $$ \"h\" || $$ \"w\"
- val input1 = explode \"hello\"
- val input2 = explode \"world\"
+ val input1 = Symbol.explode \"hello\"
+ val input2 = Symbol.explode \"world\"
in
(hw input1, hw input2)
end"
@@ -124,7 +142,7 @@
"let
val just_e = $$ \"h\" |-- $$ \"e\"
val just_h = $$ \"h\" --| $$ \"e\"
- val input = explode \"hello\"
+ val input = Symbol.explode \"hello\"
in
(just_e input, just_h input)
end"
@@ -137,8 +155,8 @@
@{ML_response [display,gray]
"let
val p = Scan.optional ($$ \"h\") \"x\"
- val input1 = explode \"hello\"
- val input2 = explode \"world\"
+ val input1 = Symbol.explode \"hello\"
+ val input2 = Symbol.explode \"world\"
in
(p input1, p input2)
end"
@@ -150,8 +168,8 @@
@{ML_response [display,gray]
"let
val p = Scan.option ($$ \"h\")
- val input1 = explode \"hello\"
- val input2 = explode \"world\"
+ val input1 = Symbol.explode \"hello\"
+ val input2 = Symbol.explode \"world\"
in
(p input1, p input2)
end" "((SOME \"h\", [\"e\", \"l\", \"l\", \"o\"]), (NONE, [\"w\", \"o\", \"r\", \"l\", \"d\"]))"}
@@ -182,12 +200,12 @@
on @{text [quotes] "hello"}, the parsing succeeds
@{ML_response [display,gray]
- "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"hello\")"
+ "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"hello\")"
"(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
but if you invoke it on @{text [quotes] "world"}
- @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"world\")"
+ @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"world\")"
"Exception ABORT raised"}
then the parsing aborts and the error message @{text "foo"} is printed. In order to
@@ -219,12 +237,12 @@
@{text [quotes] "h"}, @{text [quotes] "e"} and @{text [quotes] "w"}, and
the input @{text [quotes] "holle"}
- @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"holle\")"
+ @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"holle\")"
"Exception ERROR \"h is not followed by e\" raised"}
produces the correct error message. Running it with
- @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"wworld\")"
+ @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"wworld\")"
"((\"w\", \"w\"), [\"o\", \"r\", \"l\", \"d\"])"}
yields the expected parsing.
@@ -232,7 +250,7 @@
The function @{ML "Scan.repeat p" for p} will apply a parser @{text p} as
often as it succeeds. For example:
- @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (explode \"hhhhello\")"
+ @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (Symbol.explode \"hhhhello\")"
"([\"h\", \"h\", \"h\", \"h\"], [\"e\", \"l\", \"l\", \"o\"])"}
Note that @{ML "Scan.repeat"} stores the parsed items in a list. The function
@@ -244,7 +262,7 @@
the wrapper @{ML Scan.finite} and the ``stopper-token'' @{ML Symbol.stopper}. With
them you can write:
- @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (explode \"hhhh\")"
+ @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (Symbol.explode \"hhhh\")"
"([\"h\", \"h\", \"h\", \"h\"], [])"}
@{ML Symbol.stopper} is the ``end-of-input'' indicator for parsing strings;
@@ -257,7 +275,7 @@
@{ML_response [display,gray]
"let
val p = Scan.repeat (Scan.one Symbol.not_eof)
- val input = explode \"foo bar foo\"
+ val input = Symbol.explode \"foo bar foo\"
in
Scan.finite Symbol.stopper p input
end"
@@ -269,12 +287,12 @@
The function @{ML "Scan.unless p q" for p q} takes two parsers: if the first one can
parse the input, then the whole parser fails; if not, then the second is tried. Therefore
- @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"hello\")"
+ @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"hello\")"
"Exception FAIL raised"}
fails, while
- @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"world\")"
+ @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"world\")"
"(\"w\",[\"o\", \"r\", \"l\", \"d\"])"}
succeeds.
@@ -286,8 +304,8 @@
@{ML_response [display,gray]
"let
val p = Scan.repeat (Scan.unless ($$ \"*\") (Scan.one Symbol.not_eof))
- val input1 = explode \"fooooo\"
- val input2 = explode \"foo*ooo\"
+ val input1 = Symbol.explode \"fooooo\"
+ val input2 = Symbol.explode \"foo*ooo\"
in
(Scan.finite Symbol.stopper p input1,
Scan.finite Symbol.stopper p input2)
@@ -304,7 +322,7 @@
"let
fun double (x, y) = (x ^ x, y ^ y)
in
- (($$ \"h\") -- ($$ \"e\") >> double) (explode \"hello\")
+ (($$ \"h\") -- ($$ \"e\") >> double) (Symbol.explode \"hello\")
end"
"((\"hh\", \"ee\"), [\"l\", \"l\", \"o\"])"}
@@ -313,7 +331,7 @@
@{ML_response [display,gray]
"let
val p = Scan.repeat (Scan.one Symbol.not_eof)
- val input = explode \"foo bar foo\"
+ val input = Symbol.explode \"foo bar foo\"
in
Scan.finite Symbol.stopper (p >> implode) input
end"
@@ -328,7 +346,7 @@
input unchanged. For example:
@{ML_response [display,gray]
- "Scan.ahead (Scan.this_string \"foo\") (explode \"foo\")"
+ "Scan.ahead (Scan.this_string \"foo\") (Symbol.explode \"foo\")"
"(\"foo\", [\"f\", \"o\", \"o\"])"}
The function @{ML Scan.lift} takes a parser and a pair as arguments. This function applies
@@ -336,7 +354,7 @@
untouched. For example
@{ML_response [display,gray]
-"Scan.lift ($$ \"h\" -- $$ \"e\") (1, explode \"hello\")"
+"Scan.lift ($$ \"h\" -- $$ \"e\") (1, Symbol.explode \"hello\")"
"((\"h\", \"e\"), (1, [\"l\", \"l\", \"o\"]))"}
(FIXME: In which situations is this useful? Give examples.)
@@ -1757,8 +1775,7 @@
structure \texttt{Method}, but not published in its signature.
The source file \texttt{src/Pure/Isar/method.ML} shows the use of
\texttt{Method.add\_method} to add a number of methods.
-
+*}
-*}
(*>*)
end