ProgTutorial/Parsing.thy
changeset 236 7b6d81ff9d9a
parent 230 8def50824320
child 240 d111f5988e49
--- a/ProgTutorial/Parsing.thy	Wed Apr 08 22:47:39 2009 +0100
+++ b/ProgTutorial/Parsing.thy	Thu Apr 09 18:11:35 2009 +0000
@@ -7,9 +7,9 @@
 text {*
 
   Isabelle distinguishes between \emph{outer} and \emph{inner} syntax. 
-  Theory commands, such as \isacommand{definition}, \isacommand{inductive} and so
-  on, belong to the outer syntax, whereas items inside double quotation marks, such 
-  as terms, types and so on, belong to the inner syntax. For parsing inner syntax, 
+  Commands, such as \isacommand{definition}, \isacommand{inductive} and so
+  on, belong to the outer syntax, whereas terms, types and so on belong to the 
+  inner syntax. For parsing inner syntax, 
   Isabelle uses a rather general and sophisticated algorithm, which 
   is driven by priority grammars. Parsers for outer syntax are built up by functional
   parsing combinators. These combinators are a well-established technique for parsing, 
@@ -18,14 +18,15 @@
   either for new definitional packages or for calling methods with specific arguments. 
 
   \begin{readmore}
-  The library 
-  for writing parser combinators is split up, roughly, into two parts. 
-  The first part consists of a collection of generic parser combinators defined
-  in the structure @{ML_struct Scan} in the file 
-  @{ML_file "Pure/General/scan.ML"}. The second part of the library consists of 
-  combinators for dealing with specific token types, which are defined in the 
-  structure @{ML_struct OuterParse} in the file 
-  @{ML_file "Pure/Isar/outer_parse.ML"}.
+  The library for writing parser combinators is split up, roughly, into two
+  parts. The first part consists of a collection of generic parser combinators
+  defined in the structure @{ML_struct Scan} in the file @{ML_file
+  "Pure/General/scan.ML"}. The second part of the library consists of
+  combinators for dealing with specific token types, which are defined in the
+  structure @{ML_struct OuterParse} in the file @{ML_file
+  "Pure/Isar/outer_parse.ML"}. Specific parsers for packages are defined
+  in @{ML_file "Pure/Isar/spec_parse.ML"}. Parsers for method arguments
+  are defined in @{ML_file "Pure/Isar/args.ML"}.
   \end{readmore}
 
 *}
@@ -75,10 +76,10 @@
 @{ML_response [display,gray] 
 "let 
   val hw = Scan.one (fn x => x = \"h\" orelse x = \"w\")
-  val input1 = (explode \"hello\")
-  val input2 = (explode \"world\")
+  val input1 = explode \"hello\"
+  val input2 = explode \"world\"
 in
-    (hw input1, hw input2)
+  (hw input1, hw input2)
 end"
     "((\"h\", [\"e\", \"l\", \"l\", \"o\"]),(\"w\", [\"o\", \"r\", \"l\", \"d\"]))"}
 
@@ -86,16 +87,18 @@
   For example parsing @{text "h"}, @{text "e"} and @{text "l"} (in this 
   order) you can achieve by:
 
-  @{ML_response [display,gray] "(($$ \"h\") -- ($$ \"e\") -- ($$ \"l\")) (explode \"hello\")"
-                          "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}
+  @{ML_response [display,gray] 
+  "($$ \"h\" -- $$ \"e\" -- $$ \"l\") (explode \"hello\")"
+  "(((\"h\", \"e\"), \"l\"), [\"l\", \"o\"])"}
 
   Note how the result of consumed strings builds up on the left as nested pairs.  
 
   If, as in the previous example, you want to parse a particular string, 
   then you should use the function @{ML Scan.this_string}:
 
-  @{ML_response [display,gray] "Scan.this_string \"hell\" (explode \"hello\")"
-                          "(\"hell\", [\"o\"])"}
+  @{ML_response [display,gray] 
+  "Scan.this_string \"hell\" (explode \"hello\")"
+  "(\"hell\", [\"o\"])"}
 
   Parsers that explore alternatives can be constructed using the function @{ML
   "||"}. The parser @{ML "(p || q)" for p q} returns the
@@ -105,9 +108,9 @@
 
 @{ML_response [display,gray] 
 "let 
-  val hw = ($$ \"h\") || ($$ \"w\")
-  val input1 = (explode \"hello\")
-  val input2 = (explode \"world\")
+  val hw = $$ \"h\" || $$ \"w\"
+  val input1 = explode \"hello\"
+  val input2 = explode \"world\"
 in
   (hw input1, hw input2)
 end"
@@ -119,9 +122,9 @@
   
 @{ML_response [display,gray]
 "let 
-  val just_e = ($$ \"h\") |-- ($$ \"e\") 
-  val just_h = ($$ \"h\") --| ($$ \"e\") 
-  val input = (explode \"hello\")  
+  val just_e = $$ \"h\" |-- $$ \"e\" 
+  val just_h = $$ \"h\" --| $$ \"e\" 
+  val input = explode \"hello\"  
 in 
   (just_e input, just_h input)
 end"
@@ -134,8 +137,8 @@
 @{ML_response [display,gray]
 "let 
   val p = Scan.optional ($$ \"h\") \"x\"
-  val input1 = (explode \"hello\")
-  val input2 = (explode \"world\")  
+  val input1 = explode \"hello\"
+  val input2 = explode \"world\"  
 in 
   (p input1, p input2)
 end" 
@@ -147,8 +150,8 @@
 @{ML_response [display,gray]
 "let 
   val p = Scan.option ($$ \"h\")
-  val input1 = (explode \"hello\")
-  val input2 = (explode \"world\")  
+  val input1 = explode \"hello\"
+  val input2 = explode \"world\"  
 in 
   (p input1, p input2)
 end" "((SOME \"h\", [\"e\", \"l\", \"l\", \"o\"]), (NONE, [\"w\", \"o\", \"r\", \"l\", \"d\"]))"}
@@ -166,7 +169,7 @@
   To see this assume that @{text p} is present in the input, but it is not
   followed by @{text q}. That means @{ML "(p -- q)" for p q} will fail and
   hence the alternative parser @{text r} will be tried. However, in many
-  circumstances this will be the wrong parser for the input ``p-followed-by-something''
+  circumstances this will be the wrong parser for the input ``@{text "p"}-followed-by-something''
   and therefore will also fail. The error message is then caused by the failure
   of @{text r}, not by the absence of @{text q} in the input. This kind of
   situation can be avoided when using the function @{ML "!!"}.  This function
@@ -174,13 +177,13 @@
   message. For example if you invoke the parser
 
   
-  @{ML [display,gray] "(!! (fn _ => \"foo\") ($$ \"h\"))"}
+  @{ML [display,gray] "!! (fn _ => \"foo\") ($$ \"h\")"}
 
   on @{text [quotes] "hello"}, the parsing succeeds
 
   @{ML_response [display,gray] 
-                "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"hello\")" 
-                "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
+  "(!! (fn _ => \"foo\") ($$ \"h\")) (explode \"hello\")" 
+  "(\"h\", [\"e\", \"l\", \"l\", \"o\"])"}
 
   but if you invoke it on @{text [quotes] "world"}
   
@@ -191,20 +194,21 @@
   see the error message properly, you need to prefix the parser with the function 
   @{ML "Scan.error"}. For example:
 
-  @{ML_response_fake [display,gray] "Scan.error (!! (fn _ => \"foo\") ($$ \"h\"))"
-                               "Exception Error \"foo\" raised"}
+  @{ML_response_fake [display,gray] 
+  "Scan.error (!! (fn _ => \"foo\") ($$ \"h\"))"
+  "Exception Error \"foo\" raised"}
 
   This ``prefixing'' is usually done by wrappers such as @{ML "OuterSyntax.local_theory"} 
   (see Section~\ref{sec:newcommand} which explains this function in more detail). 
   
   Let us now return to our example of parsing @{ML "(p -- q) || r" for p q
-  r}. If you want to generate the correct error message for p-followed-by-q,
-  then you have to write:
+  r}. If you want to generate the correct error message for 
+  @{text "p"}-followed-by-@{text "q"}, then you have to write:
 *}
 
 ML{*fun p_followed_by_q p q r =
 let 
-  val err_msg = (fn _ => p ^ " is not followed by " ^ q)
+  val err_msg = fn _ => p ^ " is not followed by " ^ q
 in
   ($$ p -- (!! err_msg ($$ q))) || ($$ r -- $$ r)
 end *}
@@ -253,7 +257,7 @@
   @{ML_response [display,gray] 
 "let 
    val p = Scan.repeat (Scan.one Symbol.not_eof)
-   val input = (explode \"foo bar foo\") 
+   val input = explode \"foo bar foo\"
 in
    Scan.finite Symbol.stopper p input
 end" 
@@ -282,8 +286,8 @@
   @{ML_response [display,gray]
 "let 
   val p = Scan.repeat (Scan.unless ($$ \"*\") (Scan.one Symbol.not_eof))
-  val input1 = (explode \"fooooo\")
-  val input2 = (explode \"foo*ooo\")
+  val input1 = explode \"fooooo\"
+  val input2 = explode \"foo*ooo\"
 in
   (Scan.finite Symbol.stopper p input1, 
    Scan.finite Symbol.stopper p input2)
@@ -309,7 +313,7 @@
   @{ML_response [display,gray] 
 "let 
    val p = Scan.repeat (Scan.one Symbol.not_eof)
-   val input = (explode \"foo bar foo\") 
+   val input = explode \"foo bar foo\" 
 in
    Scan.finite Symbol.stopper (p >> implode) input
 end" 
@@ -332,7 +336,7 @@
   untouched. For example
 
 @{ML_response [display,gray]
-"Scan.lift (($$ \"h\") -- ($$ \"e\")) (1,(explode \"hello\"))"
+"Scan.lift ($$ \"h\" -- $$ \"e\") (1, explode \"hello\")"
 "((\"h\", \"e\"), (1, [\"l\", \"l\", \"o\"]))"}
 
   (FIXME: In which situations is this useful? Give examples.) 
@@ -342,7 +346,8 @@
   within @{text "(*\<dots>*)"} is replaced by the same comment but enclosed within
   @{text "(**\<dots>**)"} in the output string. To enclose a string, you can use the
   function @{ML "enclose s1 s2 s" for s1 s2 s} which produces the string @{ML
-  "s1 ^ s ^ s2" for s1 s2 s}.
+  "s1 ^ s ^ s2" for s1 s2 s}. Hint: To simplify the task ignore the proper 
+  nesting of comments.
   \end{exercise}
 *}