CookBook/Parsing.thy
changeset 39 631d12c25bde
parent 38 e21b2f888fa2
child 40 35e1dff0d9bb
--- a/CookBook/Parsing.thy	Tue Oct 14 01:33:55 2008 -0400
+++ b/CookBook/Parsing.thy	Fri Oct 17 05:02:04 2008 -0400
@@ -10,7 +10,7 @@
   Isabelle distinguishes between \emph{outer} and \emph{inner} syntax. 
   Theory commands, such as \isacommand{definition}, \isacommand{inductive} and so
   on, belong to the outer syntax, whereas items inside double quotation marks, such 
-  as terms and types, belong to the inner syntax. For parsing inner syntax, 
+  as terms, types and so on, belong to the inner syntax. For parsing inner syntax, 
   Isabelle uses a rather general and sophisticated algorithm due to Earley, which 
   is driven by priority grammars. Parsers for outer syntax are built up by functional
   parsing combinators. These combinators are a well-established technique for parsing, 
@@ -30,100 +30,113 @@
 
 section {* Building Up Generic Parsers *}
 
-text {*
-  Let us first have a look at parsing strings using generic parsing combinators. 
-  The function @{ML "$$"} takes a string and will ``consume'' this string from 
-  a given input list of strings. Consume in this context means that it will 
-  return a pair consisting of this string and the rest of the list. 
-  For example:
-*}
-
-ML {* ($$ "h") (explode "hello"); 
-       ($$ "w") (explode "world") *}
 
 text {*
-  returns @{ML "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"} and then 
-  @{ML "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}. This function will either succeed (as in
-  the two examples above) or raise the exeption @{ML_text "FAIL"} if no string
-  can be consumed: for example @{ML_text "($$ \"x\") (explode \"world\")"}.
+
+  Let us first have a look at parsing strings using generic parsing combinators. 
+  The function @{ML "(op $$)"} takes a string and will ``consume'' this string from 
+  a given input list of strings. ``Consume'' in this context means that it will 
+  return a pair consisting of this string and the rest of the input list. 
+  For example:
+
+  @{ML_response [display] "($$ \"h\") (explode \"hello\")" "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
+  @{ML_response [display] "($$ \"w\") (explode \"world\")" "(\"w\", [\"o\", \"r\", \"l\", \"d\"])"}
+
+  This function will either succeed (as in the two examples above) or raise the exeption 
+  @{ML_text "FAIL"} if no string can be consumed. For example in the next example
+  try to parse: 
+
+  @{ML_text [display] "($$ \"x\") (explode \"world\")"}
+
+  There are three exceptions used in the parsing combinators:
+
+  (FIXME: describe)
 
-  Two such parser can be connected in sequence using the funtion @{ML "(op --)"}. 
+  \begin{itemize}
+  \item @{ML_text "FAIL"}
+  \item @{ML_text "MORE"}
+  \item @{ML_text "ABORT"}
+  \end{itemize}
+
+  Slightly more general than @{ML "(op $$)"} is the function @{ML Scan.one} in that it 
+  takes a predicate as argument and then parses exactly one item from the input list 
+  satisfying this prediate. For example the following parser either consumes an @{ML_text "h"}
+  or a @{ML_text "w"}:
+
+@{ML_response [display] 
+"let val hw = Scan.one (fn x => x = \"h\" orelse x = \"w\")
+  val input1 = (explode \"hello\")
+  val input2 = (explode \"world\")
+in
+    (hw input1, hw input2)
+end"
+    "((\"h\", [\"e\", \"l\", \"l\", \"o\"]),(\"w\", [\"o\", \"r\", \"l\", \"d\"]))"}
+
+  Two parser can be connected in sequence by using the funtion @{ML "(op --)"}. 
   For example   
-*}
-
-ML {* (($$ "h") -- ($$ "e") -- ($$ "l")) (explode "hello") *}
 
-text {*
-  returns @{ML "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}. Note how the result of
-  consumed strings builds up on the left as nested pairs.  
+  @{ML_response [display] "(($$ \"h\") -- ($$ \"e\") -- ($$ \"l\")) (explode \"hello\")"
+                          "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}
+
+  Note how the result of consumed strings builds up on the left as nested pairs.  
 
   Parsers that explore 
   alternatives can be constructed using the function @{ML "(op ||)"}. For example, the 
   parser @{ML_open "p || q" for p q} returns the result of @{ML_text "p"}, if it succeeds, 
   otherwise it returns the result of @{ML_text "q"}. For example
-*}
 
-ML {*
-  let val hw = ($$ "h") || ($$ "w")
-      val input1 = (explode "hello")
-      val input2 = (explode "world")
-  in
-    (hw input1, hw input2)
-  end
-*}
+@{ML_response [display] 
+"let val hw = ($$ \"h\") || ($$ \"w\")
+  val input1 = (explode \"hello\")
+  val input2 = (explode \"world\")
+in
+  (hw input1, hw input2)
+end"
+  "((\"h\", [\"e\", \"l\", \"l\", \"o\"]), (\"w\", [\"o\", \"r\", \"l\", \"d\"]))"}
 
-text {*
   will in the first case consume the @{ML_text "h"} and in the second the @{ML_text "w"}.
   The functions @{ML "(op |--)"} and @{ML "(op --|)"} work like the sequencing funtion 
   for parsers, except that they discard the item parsed by the first (respectively second)
   parser. For example
-*}
-
-ML {* 
-  let val just_h = ($$ "h") --| ($$ "e") 
-      val just_e = ($$ "h") |-- ($$ "e") 
-      val input = (explode "hello")  
-  in 
-   (just_h input, just_e input)
-  end
-*}
-
-text {*
-  produces @{ML "(\"h\", [\"l\", \"l\", \"o\"])"} and @{ML "(\"e\", [\"l\", \"l\", \"o\"])"}, 
-  respectively. 
-
-  (FIXME does it make sense to explain the other functions from @{ML_text BASIC_SCAN}?)
+  
+@{ML_response [display]
+"let val just_h = ($$ \"h\") |-- ($$ \"e\") 
+  val just_e = ($$ \"h\") --| ($$ \"e\") 
+  val input = (explode \"hello\")  
+in 
+  (just_h input, just_e input)
+end"
+  "((\"e\", [\"l\", \"l\", \"o\"]),(\"h\", [\"l\", \"l\", \"o\"]))"}
 
   The parser @{ML_open "Scan.optional p x" for p x} returns the result of the parser 
   @{ML_text "p"}, if it succeeds; otherwise it returns 
   the default value @{ML_text "x"}. For example
-*}
 
-ML {* 
-  let val p = Scan.optional ($$ "h") "x"
-      val input1 = (explode "hello")
-      val input2 = (explode "world")  
-  in 
-   (p input1, p input2)
-  end
+@{ML_response [display]
+"let val p = Scan.optional ($$ \"h\") \"x\"
+  val input1 = (explode \"hello\")
+  val input2 = (explode \"world\")  
+in 
+  (p input1, p input2)
+end" 
+ "((\"h\", [\"e\", \"l\", \"l\", \"o\"]), (\"x\", [\"w\", \"o\", \"r\", \"l\", \"d\"]))"}
+
+  The function @{ML "(op !!)"} helps to produce appropriate error messages
+  during parsing. 
+
 *}
 
-text {*
-  returns @{ML "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"} in the first case and 
-  in the second case @{ML "(\"x\", [\"w\", \"o\", \"r\", \"l\", \"d\"])"}.
-  
-*}
-
-text {* (FIXME: explain @{ML "op !!"}) *}
-
 ML {*
   
   val err_fn = (fn _ => "foo");
   val p = (!! err_fn ($$ "h"))  || ($$ "w");
   val input1 = (explode "hello");
   val input2 = (explode "world");
-  
-  p input1;
+*}  
+
+ML {*
+
+  (*Scan.error p input2;*)
 *}
 
 text {* (FIXME: why does @{ML_text "p input2"} not do anything with foo?) *}