ProgTutorial/Parsing.thy
changeset 240 d111f5988e49
parent 236 7b6d81ff9d9a
child 241 29d4701c5ee1
--- a/ProgTutorial/Parsing.thy	Wed Apr 15 13:11:08 2009 +0000
+++ b/ProgTutorial/Parsing.thy	Sat Apr 25 14:28:58 2009 +0200
@@ -35,24 +35,28 @@
 
 text {*
 
-  Let us first have a look at parsing strings using generic parsing combinators. 
-  The function @{ML "$$"} takes a string as argument and will ``consume'' this string from 
-  a given input list of strings. ``Consume'' in this context means that it will 
-  return a pair consisting of this string and the rest of the input list. 
-  For example:
+  Let us first have a look at parsing strings using generic parsing
+  combinators. The function @{ML "$$"} takes a string as argument and will
+  ``consume'' this string from a given input list of strings. ``Consume'' in
+  this context means that it will return a pair consisting of this string and
+  the rest of the input list. For example:
 
-  @{ML_response [display,gray] "($$ \"h\") (explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
+  @{ML_response [display,gray] 
+  "($$ \"h\") (Symbol.explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
 
-  @{ML_response [display,gray] "($$ \"w\") (explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}
+  @{ML_response [display,gray] 
+  "($$ \"w\") (Symbol.explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}
 
-  The function @{ML "$$"} will either succeed (as in the two examples above) or raise the exception 
-  @{text "FAIL"} if no string can be consumed. For example trying to parse
+  The function @{ML "$$"} will either succeed (as in the two examples above)
+  or raise the exception @{text "FAIL"} if no string can be consumed. For
+  example trying to parse
 
-  @{ML_response_fake [display,gray] "($$ \"x\") (explode \"world\")" 
-                               "Exception FAIL raised"}
+  @{ML_response_fake [display,gray] 
+  "($$ \"x\") (Symbol.explode \"world\")" 
+  "Exception FAIL raised"}
   
-  will raise the exception @{text "FAIL"}.
-  There are three exceptions used in the parsing combinators:
+  will raise the exception @{text "FAIL"}.  There are three exceptions used in
+  the parsing combinators:
 
   \begin{itemize}
   \item @{text "FAIL"} is used to indicate that alternative routes of parsing 
@@ -65,19 +69,33 @@
 
   However, note that these exceptions are private to the parser and cannot be accessed
   by the programmer (for example to handle them).
-  
+
+  In the examples above we use the function @{ML Symbol.explode}, instead of the 
+  more standard library function @{ML explode}, for obtaining an input list for 
+  the parser. The reason is that @{ML Symbol.explode} is aware of character sequences,
+  for example @{text "\\<foo>"}, that have a special meaning in Isabelle. To see
+  the difference consider
+
+@{ML_response_fake [display,gray]
+"let 
+  val input = \"\\<foo> bar\"
+in
+  (explode input, Symbol.explode input)
+end"
+"([\"\\\", \"<\", \"f\", \"o\", \"o\", \">\", \" \", \"b\", \"a\", \"r\"],
+ [\"\\<foo>\", \" \", \"b\", \"a\", \"r\"])"}
+
   Slightly more general than the parser @{ML "$$"} is the function @{ML
   Scan.one}, in that it takes a predicate as argument and then parses exactly
   one item from the input list satisfying this predicate. For example the
   following parser either consumes an @{text [quotes] "h"} or a @{text
   [quotes] "w"}:
 
-
 @{ML_response [display,gray] 
 "let 
   val hw = Scan.one (fn x => x = \"h\" orelse x = \"w\")
-  val input1 = explode \"hello\"
-  val input2 = explode \"world\"
+  val input1 = Symbol.explode \"hello\"
+  val input2 = Symbol.explode \"world\"
 in
   (hw input1, hw input2)
 end"
@@ -88,7 +106,7 @@
   order) you can achieve by:
 
   @{ML_response [display,gray] 
-  "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (explode \"hello\")"
+  "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (Symbol.explode \"hello\")"
   "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}
 
   Note how the result of consumed strings builds up on the left as nested pairs.  
@@ -97,7 +115,7 @@
   then you should use the function @{ML Scan.this_string}:
 
   @{ML_response [display,gray] 
-  "Scan.this_string \"hell\" (explode \"hello\")"
+  "Scan.this_string \"hell\" (Symbol.explode \"hello\")"
   "(\"hell\", [\"o\"])"}
 
   Parsers that explore alternatives can be constructed using the function @{ML
@@ -109,8 +127,8 @@
 @{ML_response [display,gray] 
 "let 
   val hw = $$ \"h\" || $$ \"w\"
-  val input1 = explode \"hello\"
-  val input2 = explode \"world\"
+  val input1 = Symbol.explode \"hello\"
+  val input2 = Symbol.explode \"world\"
 in
   (hw input1, hw input2)
 end"
@@ -124,7 +142,7 @@
 "let 
   val just_e = $$ \"h\" |-- $$ \"e\" 
   val just_h = $$ \"h\" --| $$ \"e\" 
-  val input = explode \"hello\"  
+  val input = Symbol.explode \"hello\"  
 in 
   (just_e input, just_h input)
 end"
@@ -137,8 +155,8 @@
 @{ML_response [display,gray]
 "let 
   val p = Scan.optional ($$ \"h\") \"x\"
-  val input1 = explode \"hello\"
-  val input2 = explode \"world\"  
+  val input1 = Symbol.explode \"hello\"
+  val input2 = Symbol.explode \"world\"  
 in 
   (p input1, p input2)
 end" 
@@ -150,8 +168,8 @@
 @{ML_response [display,gray]
 "let 
   val p = Scan.option ($$ \"h\")
-  val input1 = explode \"hello\"
-  val input2 = explode \"world\"  
+  val input1 = Symbol.explode \"hello\"
+  val input2 = Symbol.explode \"world\"  
 in 
   (p input1, p input2)
 end" "((SOME \"h\", [\"e\", \"l\", \"l\", \"o\"]), (NONE, [\"w\", \"o\", \"r\", \"l\", \"d\"]))"}
@@ -182,12 +200,12 @@
   on @{text [quotes] "hello"}, the parsing succeeds
 
   @{ML_response [display,gray] 
-  "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"hello\")" 
+  "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"hello\")" 
   "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
 
   but if you invoke it on @{text [quotes] "world"}
   
-  @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"world\")"
+  @{ML_response_fake [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\")) (Symbol.explode \"world\")"
                                "Exception ABORT raised"}
 
   then the parsing aborts and the error message @{text "foo"} is printed. In order to
@@ -219,12 +237,12 @@
   @{text [quotes] "h"}, @{text [quotes] "e"} and @{text [quotes] "w"}, and 
   the input @{text [quotes] "holle"} 
 
-  @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"holle\")"
+  @{ML_response_fake [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"holle\")"
                                "Exception ERROR \"h is not followed by e\" raised"} 
 
   produces the correct error message. Running it with
  
-  @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (explode \"wworld\")"
+  @{ML_response [display,gray] "Scan.error (p_followed_by_q \"h\" \"e\" \"w\") (Symbol.explode \"wworld\")"
                           "((\"w\", \"w\"), [\"o\", \"r\", \"l\", \"d\"])"}
   
   yields the expected parsing. 
@@ -232,7 +250,7 @@
   The function @{ML "Scan.repeat p" for p} will apply a parser @{text p} as 
   often as it succeeds. For example:
   
-  @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (explode \"hhhhello\")" 
+  @{ML_response [display,gray] "Scan.repeat ($$ \"h\") (Symbol.explode \"hhhhello\")" 
                 "([\"h\", \"h\", \"h\", \"h\"], [\"e\", \"l\", \"l\", \"o\"])"}
   
   Note that @{ML "Scan.repeat"} stores the parsed items in a list. The function
@@ -244,7 +262,7 @@
   the wrapper @{ML Scan.finite} and the ``stopper-token'' @{ML Symbol.stopper}. With
   them you can write:
   
-  @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (explode \"hhhh\")" 
+  @{ML_response [display,gray] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (Symbol.explode \"hhhh\")" 
                 "([\"h\", \"h\", \"h\", \"h\"], [])"}
 
   @{ML Symbol.stopper} is the ``end-of-input'' indicator for parsing strings;
@@ -257,7 +275,7 @@
   @{ML_response [display,gray] 
 "let 
    val p = Scan.repeat (Scan.one Symbol.not_eof)
-   val input = explode \"foo bar foo\"
+   val input = Symbol.explode \"foo bar foo\"
 in
    Scan.finite Symbol.stopper p input
 end" 
@@ -269,12 +287,12 @@
   The function @{ML "Scan.unless p q" for p q} takes two parsers: if the first one can 
   parse the input, then the whole parser fails; if not, then the second is tried. Therefore
   
-  @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"hello\")"
+  @{ML_response_fake_both [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"hello\")"
                                "Exception FAIL raised"}
 
   fails, while
 
-  @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (explode \"world\")"
+  @{ML_response [display,gray] "Scan.unless ($$ \"h\") ($$ \"w\") (Symbol.explode \"world\")"
                           "(\"w\",[\"o\", \"r\", \"l\", \"d\"])"}
 
   succeeds. 
@@ -286,8 +304,8 @@
   @{ML_response [display,gray]
 "let 
   val p = Scan.repeat (Scan.unless ($$ \"*\") (Scan.one Symbol.not_eof))
-  val input1 = explode \"fooooo\"
-  val input2 = explode \"foo*ooo\"
+  val input1 = Symbol.explode \"fooooo\"
+  val input2 = Symbol.explode \"foo*ooo\"
 in
   (Scan.finite Symbol.stopper p input1, 
    Scan.finite Symbol.stopper p input2)
@@ -304,7 +322,7 @@
 "let 
   fun double (x, y) = (x ^ x, y ^ y) 
 in
-  (($$ \"h\") -- ($$ \"e\") >> double) (explode \"hello\")
+  (($$ \"h\") -- ($$ \"e\") >> double) (Symbol.explode \"hello\")
 end"
 "((\"hh\", \"ee\"), [\"l\", \"l\", \"o\"])"}
 
@@ -313,7 +331,7 @@
   @{ML_response [display,gray] 
 "let 
    val p = Scan.repeat (Scan.one Symbol.not_eof)
-   val input = explode \"foo bar foo\" 
+   val input = Symbol.explode \"foo bar foo\" 
 in
    Scan.finite Symbol.stopper (p >> implode) input
 end" 
@@ -328,7 +346,7 @@
   input unchanged. For example:
 
   @{ML_response [display,gray]
-  "Scan.ahead (Scan.this_string \"foo\") (explode \"foo\")" 
+  "Scan.ahead (Scan.this_string \"foo\") (Symbol.explode \"foo\")" 
   "(\"foo\", [\"f\", \"o\", \"o\"])"} 
 
   The function @{ML Scan.lift} takes a parser and a pair as arguments. This function applies
@@ -336,7 +354,7 @@
   untouched. For example
 
 @{ML_response [display,gray]
-"Scan.lift ($$ \"h\" -- $$ \"e\") (1, explode \"hello\")"
+"Scan.lift ($$ \"h\" -- $$ \"e\") (1, Symbol.explode \"hello\")"
 "((\"h\", \"e\"), (1, [\"l\", \"l\", \"o\"]))"}
 
   (FIXME: In which situations is this useful? Give examples.) 
@@ -1757,8 +1775,7 @@
   structure \texttt{Method}, but not published in its signature.
   The source file \texttt{src/Pure/Isar/method.ML} shows the use of 
   \texttt{Method.add\_method} to add a number of methods.
-
+*}
 
-*}
 (*>*)
 end