CookBook/Parsing.thy
changeset 49 a0edabf14457
parent 48 609f9ef73494
child 50 3d4b49921cdb
--- a/CookBook/Parsing.thy	Sat Nov 01 15:20:36 2008 +0100
+++ b/CookBook/Parsing.thy	Mon Nov 24 02:51:08 2008 +0100
@@ -32,7 +32,7 @@
 
 *}
 
-section {* Building Up Generic Parsers *}
+section {* Building Generic Parsers *}
 
 text {*
 
@@ -63,12 +63,15 @@
   It is used for example in the function @{ML "(op !!)"} (see below).
   \end{itemize}
 
-  (FIXME: do the exception need to be explained, because the user cannot use them from ``outside''?)
+  However, note that these exception private to the parser and cannot be accessed
+  by the programmer (for example to handle them).
+  
+  Slightly more general than the parser @{ML "(op $$)"} is the function @{ML
+  Scan.one}, in that it takes a predicate as argument and then parses exactly
+  one item from the input list satisfying this prediate. For example the
+  following parser either consumes an @{ML_text [quotes] "h"} or a @{ML_text
+  [quotes] "w"}:
 
-  Slightly more general than the parser @{ML "(op $$)"} is the function @{ML Scan.one}, in that it 
-  takes a predicate as argument and then parses exactly one item from the input list 
-  satisfying this prediate. For example the following parser either consumes an 
-  @{ML_text [quotes] "h"} or a @{ML_text [quotes] "w"}:
 
 @{ML_response [display] 
 "let 
@@ -132,6 +135,9 @@
 end" 
  "((\"h\", [\"e\", \"l\", \"l\", \"o\"]), (\"x\", [\"w\", \"o\", \"r\", \"l\", \"d\"]))"}
 
+  The function @{ML Scan.option} works similarly, except no default value can
+  be given and in the failure case this parser does nothing.
+
   The function @{ML "(op !!)"} helps to produce appropriate error messages
   during parsing. For example if one wants to parse that @{ML_text p} is immediately 
   followed by @{ML_text q}, or start a completely different parser @{ML_text r},
@@ -214,6 +220,16 @@
   @{ML "Scan.repeat1"} is similar, but requires that the parser @{ML_text "p"} 
   succeeds at least once.
 
+  Also note that the parser would have aborted with the exception @{ML_text MORE}, if
+  we had run it only on just @{ML_text [quotes] "hhhh"}. This can be awoided using
+  the wrapper @{ML Scan.finite} and the ``stopper-token'' @{ML Symbol.stopper}. With
+  them we can write
+  
+  @{ML_response [display] "Scan.finite Symbol.stopper (Scan.repeat ($$ \"h\")) (explode \"hhhh\")" 
+                "([\"h\", \"h\", \"h\", \"h\"], [])"}
+
+  However, the Isabelle-develloper rarely needs to do this kind of wrapping manually. 
+
   After parsing succeeded, one nearly always wants to apply a function on the parsed 
   items. This is done using the function @{ML_open "(p >> f)" for p f} which runs 
   first the parser @{ML_text p} and upon successful completion applies the 
@@ -264,10 +280,10 @@
   generating precise error messages. The following\footnote{There is something funny 
   going on with the pretty printing of the result token list.}
 
-@{ML_response [display] "OuterSyntax.scan Position.none \"hello world\"" 
-"[OuterLex.Token (\<dots>,(OuterLex.Ident, \"hello\"),\<dots>), 
- OuterLex.Token (\<dots>,(OuterLex.Space, \" \"),\<dots>), 
- OuterLex.Token (\<dots>,(OuterLex.Ident, \"world\"),\<dots>)]"}
+@{ML_response_fake [display] "OuterSyntax.scan Position.none \"hello world\"" 
+"[Token (\<dots>,(OuterLex.Ident, \"hello\"),\<dots>), 
+ Token (\<dots>,(OuterLex.Space, \" \"),\<dots>), 
+ Token (\<dots>,(OuterLex.Ident, \"world\"),\<dots>)]"}
 
   produces three tokens where the first and the last are identifiers, since 
   @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match 
@@ -275,10 +291,10 @@
   later on will require spaces, comments and the like to have been filtered out.
   If we parse
 
-@{ML_response [display] "OuterSyntax.scan Position.none \"inductive|for\"" 
-"[OuterLex.Token (\<dots>,(OuterLex.Command, \"inductive\"),\<dots>), 
- OuterLex.Token (\<dots>,(OuterLex.Keyword, \"|\"),\<dots>), 
- OuterLex.Token (\<dots>,(OuterLex.Keyword, \"for\"),\<dots>)]"}
+@{ML_response_fake [display] "OuterSyntax.scan Position.none \"inductive|for\"" 
+"[Token (\<dots>,(OuterLex.Command, \"inductive\"),\<dots>), 
+ Token (\<dots>,(OuterLex.Keyword, \"|\"),\<dots>), 
+ Token (\<dots>,(OuterLex.Keyword, \"for\"),\<dots>)]"}
 
   we obtain a list consiting of only command and keyword tokens.
   If you want to see which keywords and commands are currently known, use
@@ -316,7 +332,7 @@
 
   The parser @{ML_open "OuterParse.enum s p" for s p} parses a possibly empty 
   list of items recognised by the parser @{ML_text p}, where the items are 
-  separated by @{ML_text s}. For example
+  separated by the string @{ML_text s}. For example
 
 @{ML_response [display]
 "let 
@@ -326,14 +342,62 @@
 end" 
 "([\"in\",\"in\",\"in\"],[\<dots>])"}
 
+  @{ML "OuterParse.enum1"} works similarly,  except that the parsed list must 
+  be non-empty.
+
   Note that we had to add a @{ML_text [quotes] "\\n"} at the end of the parsed
   string, otherwise the parser would have consumed all tokens and then failed with
-  the exception @{ML_text "MORE"}. @{ML "OuterParse.enum1"} works similarly, 
-  except that the parsed list must be non-empty.
+  the exception @{ML_text "MORE"}. Like in the previous section, we can avoid
+  this exception using the wrapper @{ML Scan.finite}. This time, however, we
+  have to use the ``stopper-token'' @{ML OuterLex.stopper}. We can write
+
+@{ML_response [display]
+"let 
+  val input = OuterSyntax.scan Position.none \"in|in|in\"
+in 
+  Scan.finite OuterLex.stopper (OuterParse.enum \"|\" (OuterParse.$$$ \"in\")) input
+end" 
+"([\"in\",\"in\",\"in\"],[])"}
+
+  The function @{ML "OuterParse.!!!"} can be used to force termination of the
+  parser in case of a dead end, just like @{ML "Scan.!!"} (see previous section), 
+  except that the error message is fixed to be @{text [quotes] "Outer syntax error"}
+  with a relatively precise description of the failure. For example:
+
+@{ML_response_fake [display]
+"let 
+  val input = OuterSyntax.scan Position.none \"in|\"
+  val parse_bar_then_in = OuterParse.$$$ \"|\" -- OuterParse.$$$ \"in\"
+in 
+  Scan.error (OuterParse.!!! parse_bar_then_in) input 
+end"
+"Exception ERROR \"Outer syntax error: keyword \"|\" expected, 
+but keyword in was found\" raised"
+}
 
 *}
 
-text {* FIXME explain @{ML "OuterParse.!!!"} *}
+
+ML {*
+let 
+  val input = filter OuterLex.is_proper (OuterSyntax.scan Position.none "(in foo)")
+in 
+  OuterParse.target input
+end
+*}
+
+section {* Positional Information *}
+
+text {*
+
+  @{ML OuterParse.position}
+
+*}
+
+ML {*
+  OuterParse.position
+*}
+
 
 section {* Parsing Inner Syntax *}