CookBook/Parsing.thy
changeset 47 4daf913fdbe1
parent 44 dee4b3e66dfe
child 48 609f9ef73494
--- a/CookBook/Parsing.thy	Wed Oct 29 21:51:25 2008 +0100
+++ b/CookBook/Parsing.thy	Thu Oct 30 13:36:51 2008 +0100
@@ -15,7 +15,7 @@
   Isabelle uses a rather general and sophisticated algorithm due to Earley, which 
   is driven by priority grammars. Parsers for outer syntax are built up by functional
   parsing combinators. These combinators are a well-established technique for parsing, 
-  which has, for example, been described in Paulson's classic book \cite{paulson-ml2}.
+  which has, for example, been described in Paulson's classic ML-book \cite{paulson-ml2}.
   Isabelle developers are usually concerned with writing these outer syntax parsers, 
   either for new definitional packages or for calling tactics with specific arguments. 
 
@@ -110,11 +110,11 @@
   
 @{ML_response [display]
 "let 
-  val just_h = ($$ \"h\") |-- ($$ \"e\") 
-  val just_e = ($$ \"h\") --| ($$ \"e\") 
+  val just_e = ($$ \"h\") |-- ($$ \"e\") 
+  val just_h = ($$ \"h\") --| ($$ \"e\") 
   val input = (explode \"hello\")  
 in 
-  (just_h input, just_e input)
+  (just_e input, just_h input)
 end"
   "((\"e\", [\"l\", \"l\", \"o\"]),(\"h\", [\"l\", \"l\", \"o\"]))"}
 
@@ -182,7 +182,7 @@
 *}
 
 ML {* 
-  fun p_followed_by_q p q r =
+fun p_followed_by_q p q r =
   let 
      val err = (fn _ => p ^ " is not followed by " ^ q)
   in
@@ -258,7 +258,9 @@
 
   The structure @{ML_struct OuterLex} defines several kinds of token (for example 
   @{ML "OuterLex.Ident"} for identifiers, @{ML "OuterLex.Keyword"} for keywords and
-  @{ML "OuterLex.Command"} for commands).
+  @{ML "OuterLex.Command"} for commands). Some parsers take into account the 
+  kind of token.
+  
   We can generate a token list using the function @{ML "OuterSyntax.scan"}, which we give
   below @{ML "Position.none"} as argument since, at the moment, we are not interested in
   generating precise error messages. The following\footnote{There is something funny 
@@ -269,7 +271,7 @@
  OuterLex.Token (\<dots>,(OuterLex.Space, \" \"),\<dots>), 
  OuterLex.Token (\<dots>,(OuterLex.Ident, \"world\"),\<dots>)]"}
 
-  produces three token where the first and the last are identifiers, since 
+  produces three tokens where the first and the last are identifiers, since 
   @{ML_text [quotes] "hello"} and @{ML_text [quotes] "world"} do not match 
   any other category. The second indicates a space. If we parse
 
@@ -278,7 +280,18 @@
  OuterLex.Token (\<dots>,(OuterLex.Keyword, \"|\"),\<dots>), 
  OuterLex.Token (\<dots>,(OuterLex.Keyword, \"for\"),\<dots>)]"}
 
-  we obtain a list of command/keyword token.
+  we obtain a list of command and keyword tokens.
+  If you want to see which keywords and commands are currently known, use
+  the following (you might have to adjust the @{ML print_depth} in order to
+  see the complete list):
+
+@{ML_response_fake [display] 
+"let 
+  val (keywords, commands) = OuterKeyword.get_lexicons ()
+in 
+  (Scan.dest_lexicon commands, Scan.dest_lexicon keywords)
+end" 
+"([\"}\",\"{\",\<dots>],[\"\<rightleftharpoons>\",\"\<leftharpoondown>\",\<dots>])"}
 
   Now the parser @{ML "OuterParse.$$$"} parses a single keyword. For example
  
@@ -314,7 +327,10 @@
 end" 
 "([\"in\",\"in\",\"in\"],[\<dots>])"}
 
- @{ML_open "OuterParse.enum1"} works similarly, except that the list must be non-empty.
+  Note that we had to add a @{ML_text [quotes] "\\n"} at the end of the parsed
+  string, otherwise the parser would have consumed all tokens and then failed with
+  the exception @{ML_text "FAIL"}. @{ML "OuterParse.enum1"} works similarly, 
+  except that the parsed list must be non-empty.
 
 *}
 
@@ -331,7 +347,7 @@
 
 *}
 
-text {* FIXME funny output for a proposition *}
+text {* (FIXME funny output for a proposition) *}
 
 
 
@@ -562,21 +578,36 @@
 
 *}
 
-text {*
-  FIXME
+
+
 
-  @{text "
-  begin{verbatim}
-  type token = T.token ;
-  val toks : token list = OuterSyntax.scan ``theory,imports;begin x.y.z apply ?v1 ?'a 'a -- || 44 simp (* xx *) { * fff * }'' ;
+ML {*
+  val toks = OuterSyntax.scan Position.none
+   "theory,imports;begin x.y.z apply ?v1 ?'a 'a -- || 44 simp (* xx *) { * fff * }" ;
+*}
+
+ML {*
   print_depth 20 ;
-  List.map T.text_of toks ;
-  val proper_toks = List.filter T.is_proper toks ;
-  List.map T.kind_of proper_toks ;
-  List.map T.unparse proper_toks ;
-  List.map T.val_of proper_toks ;
-  end{verbatim}"}
+*}
+
+ML {*
+  map OuterLex.text_of toks ;
+*}
+
+ML {*
+  val proper_toks = filter OuterLex.is_proper toks ;
+*}  
 
+ML {*
+  map OuterLex.kind_of proper_toks 
+*}
+
+ML {*
+  map OuterLex.unparse proper_toks ;
+*}
+
+ML {*
+  OuterLex.stopper
 *}
 
 text {*