ProgTutorial/Parsing.thy
changeset 566 6103b0eadbf2
parent 565 cecd7a941885
child 567 f7c97e64cc2a
equal deleted inserted replaced
565:cecd7a941885 566:6103b0eadbf2
   500   @{ML "Position.none"} since,
   500   @{ML "Position.none"} since,
   501   at the moment, we are not interested in generating precise error
   501   at the moment, we are not interested in generating precise error
   502   messages. The following code
   502   messages. The following code
   503 
   503 
   504 
   504 
   505 @{ML_response_fake [display,gray] "Token.explode 
   505 @{ML_response_fake [display,gray] \<open>Token.explode 
   506   (Thy_Header.get_keywords' @{context}) Position.none \"hello world\"" 
   506   (Thy_Header.get_keywords' @{context}) Position.none "hello world"\<close>
   507 "[Token (\<dots>,(Ident, \"hello\"),\<dots>), 
   507 \<open>[Token (_,(Ident, "hello"),_), 
   508  Token (\<dots>,(Space, \" \"),\<dots>), 
   508  Token (_,(Space, " "),_), 
   509  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   509  Token (_,(Ident, "world"),_)]\<close>}
   510 
   510 
   511   produces three tokens where the first and the last are identifiers, since
   511   produces three tokens where the first and the last are identifiers, since
   512   @{text [quotes] "hello"} and @{text [quotes] "world"} do not match any
   512   @{text [quotes] "hello"} and @{text [quotes] "world"} do not match any
   513   other syntactic category. The second indicates a space.
   513   other syntactic category. The second indicates a space.
   514 
   514 
   524 text \<open>
   524 text \<open>
   525   then lexing @{text [quotes] "hello world"} will produce
   525   then lexing @{text [quotes] "hello world"} will produce
   526 
   526 
   527   @{ML_response_fake [display,gray] "Token.explode 
   527   @{ML_response_fake [display,gray] "Token.explode 
   528   (Thy_Header.get_keywords' @{context}) Position.none \"hello world\"" 
   528   (Thy_Header.get_keywords' @{context}) Position.none \"hello world\"" 
   529 "[Token (\<dots>,(Keyword, \"hello\"),\<dots>), 
   529 "[Token (_,(Keyword, \"hello\"),_), 
   530  Token (\<dots>,(Space, \" \"),\<dots>), 
   530  Token (_,(Space, \" \"),_), 
   531  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   531  Token (_,(Ident, \"world\"),_)]"}
   532 
   532 
   533   Many parsing functions later on will require white space, comments and the like
   533   Many parsing functions later on will require white space, comments and the like
   534   to have already been filtered out.  So from now on we are going to use the 
   534   to have already been filtered out.  So from now on we are going to use the 
   535   functions @{ML filter} and @{ML_ind is_proper in Token} to do this. 
   535   functions @{ML filter} and @{ML_ind is_proper in Token} to do this. 
   536   For example:
   536   For example:
   539 "let
   539 "let
   540    val input = Token.explode (Thy_Header.get_keywords' @{context}) Position.none \"hello world\"
   540    val input = Token.explode (Thy_Header.get_keywords' @{context}) Position.none \"hello world\"
   541 in
   541 in
   542    filter Token.is_proper input
   542    filter Token.is_proper input
   543 end" 
   543 end" 
   544 "[Token (\<dots>,(Ident, \"hello\"), \<dots>), Token (\<dots>,(Ident, \"world\"), \<dots>)]"}
   544 "[Token (_,(Ident, \"hello\"), _), Token (_,(Ident, \"world\"), _)]"}
   545 
   545 
   546   For convenience we define the function:
   546   For convenience we define the function:
   547 \<close>
   547 \<close>
   548 
   548 
   549 ML %grayML\<open>fun filtered_input str = 
   549 ML %grayML\<open>fun filtered_input str = 
   553 text \<open>
   553 text \<open>
   554   If you now parse
   554   If you now parse
   555 
   555 
   556 @{ML_response_fake [display,gray] 
   556 @{ML_response_fake [display,gray] 
   557 "filtered_input \"inductive | for\"" 
   557 "filtered_input \"inductive | for\"" 
   558 "[Token (\<dots>,(Command, \"inductive\"),\<dots>), 
   558 "[Token (_,(Command, \"inductive\"),_), 
   559  Token (\<dots>,(Keyword, \"|\"),\<dots>), 
   559  Token (_,(Keyword, \"|\"),_), 
   560  Token (\<dots>,(Keyword, \"for\"),\<dots>)]"}
   560  Token (_,(Keyword, \"for\"),_)]"}
   561 
   561 
   562   you obtain a list consisting of only one command and two keyword tokens.
   562   you obtain a list consisting of only one command and two keyword tokens.
   563   If you want to see which keywords and commands are currently known to Isabelle, 
   563   If you want to see which keywords and commands are currently known to Isabelle, 
   564   use the function @{ML_ind get_keywords' in Thy_Header}:
   564   use the function @{ML_ind get_keywords' in Thy_Header}:
   565 
   565 
   573   val input1 = filtered_input \"where for\"
   573   val input1 = filtered_input \"where for\"
   574   val input2 = filtered_input \"| in\"
   574   val input2 = filtered_input \"| in\"
   575 in 
   575 in 
   576   (Parse.$$$ \"where\" input1, Parse.$$$ \"|\" input2)
   576   (Parse.$$$ \"where\" input1, Parse.$$$ \"|\" input2)
   577 end"
   577 end"
   578 "((\"where\",\<dots>), (\"|\",\<dots>))"}
   578 "((\"where\",_), (\"|\",_))"}
   579 
   579 
   580   Any non-keyword string can be parsed with the function @{ML_ind reserved in Parse}.
   580   Any non-keyword string can be parsed with the function @{ML_ind reserved in Parse}.
   581   For example:
   581   For example:
   582 
   582 
   583   @{ML_response [display,gray]
   583   @{ML_response [display,gray]
   607 "let 
   607 "let 
   608   val input = filtered_input \"in | in | in foo\"
   608   val input = filtered_input \"in | in | in foo\"
   609 in 
   609 in 
   610   (Parse.enum \"|\" (Parse.$$$ \"in\")) input
   610   (Parse.enum \"|\" (Parse.$$$ \"in\")) input
   611 end" 
   611 end" 
   612 "([\"in\", \"in\", \"in\"], [\<dots>])"}
   612 "([\"in\", \"in\", \"in\"], [_])"}
   613 
   613 
   614   The function @{ML_ind enum1 in Parse} works similarly, except that the
   614   The function @{ML_ind enum1 in Parse} works similarly, except that the
   615   parsed list must be non-empty. Note that we had to add a string @{text
   615   parsed list must be non-empty. Note that we had to add a string @{text
   616   [quotes] "foo"} at the end of the parsed string, otherwise the parser would
   616   [quotes] "foo"} at the end of the parsed string, otherwise the parser would
   617   have consumed all tokens and then failed with the exception \<open>MORE\<close>. Like in the previous section, we can avoid this exception using the
   617   have consumed all tokens and then failed with the exception \<open>MORE\<close>. Like in the previous section, we can avoid this exception using the
   671 text \<open>
   671 text \<open>
   672   where we pretend the parsed string starts on line 7. An example is
   672   where we pretend the parsed string starts on line 7. An example is
   673 
   673 
   674 @{ML_response_fake [display,gray]
   674 @{ML_response_fake [display,gray]
   675 "filtered_input' \"foo \\n bar\""
   675 "filtered_input' \"foo \\n bar\""
   676 "[Token ((\"foo\", ({line=7, end_line=7}, {line=7})), (Ident, \"foo\"), \<dots>),
   676 "[Token ((\"foo\", ({line=7, end_line=7}, {line=7})), (Ident, \"foo\"), _),
   677  Token ((\"bar\", ({line=8, end_line=8}, {line=8})), (Ident, \"bar\"), \<dots>)]"}
   677  Token ((\"bar\", ({line=8, end_line=8}, {line=8})), (Ident, \"bar\"), _)]"}
   678 
   678 
   679   in which the @{text [quotes] "\\n"} causes the second token to be in 
   679   in which the @{text [quotes] "\\n"} causes the second token to be in 
   680   line 8.
   680   line 8.
   681 
   681 
   682   By using the parser @{ML position in Parse} you can access the token 
   682   By using the parser @{ML position in Parse} you can access the token 
   842       \"| oddS[intro]:  \\\"even n \<Longrightarrow> odd (Suc n)\\\"\")
   842       \"| oddS[intro]:  \\\"even n \<Longrightarrow> odd (Suc n)\\\"\")
   843 in
   843 in
   844   parse spec_parser input
   844   parse spec_parser input
   845 end"
   845 end"
   846 "(([(even, NONE, NoSyn), (odd, NONE, NoSyn)],
   846 "(([(even, NONE, NoSyn), (odd, NONE, NoSyn)],
   847    [((even0,\<dots>),\<dots>),
   847    [((even0,_),_),
   848     ((evenS,\<dots>),\<dots>),
   848     ((evenS,_),_),
   849     ((oddS,\<dots>),\<dots>)]), [])"}
   849     ((oddS,_),_)]), [])"}
   850 \<close>
   850 \<close>
   851 
   851 
   852 ML \<open>let
   852 ML \<open>let
   853   val input = filtered_input 
   853   val input = filtered_input 
   854         "foo::\"int \<Rightarrow> bool\" and bar::nat (\"BAR\" 100) and blonk"
   854         "foo::\"int \<Rightarrow> bool\" and bar::nat (\"BAR\" 100) and blonk"
   873   val input = filtered_input 
   873   val input = filtered_input 
   874         \"foo::\\\"int \<Rightarrow> bool\\\" and bar::nat (\\\"BAR\\\" 100) and blonk\"
   874         \"foo::\\\"int \<Rightarrow> bool\\\" and bar::nat (\\\"BAR\\\" 100) and blonk\"
   875 in
   875 in
   876   parse Parse.vars input
   876   parse Parse.vars input
   877 end"
   877 end"
   878 "([(foo, SOME \<dots>, NoSyn), 
   878 "([(foo, SOME _, NoSyn), 
   879   (bar, SOME \<dots>, Mixfix (Source {text=\"BAR\",...}, [], 100, \<dots>)), 
   879   (bar, SOME _, Mixfix (Source {text=\"BAR\",...}, [], 100, _)), 
   880   (blonk, NONE, NoSyn)],[])"}  
   880   (blonk, NONE, NoSyn)],[])"}  
   881 \<close>
   881 \<close>
   882 
   882 
   883 text \<open>
   883 text \<open>
   884   Whenever types are given, they are stored in the @{ML SOME}s. The types are
   884   Whenever types are given, they are stored in the @{ML SOME}s. The types are
   902 @{ML_response [display,gray] "let 
   902 @{ML_response [display,gray] "let 
   903   val input = filtered_input \"foo_lemma[intro,dest!]:\"
   903   val input = filtered_input \"foo_lemma[intro,dest!]:\"
   904   val ((name, attrib), _) = parse (Parse_Spec.thm_name \":\") input 
   904   val ((name, attrib), _) = parse (Parse_Spec.thm_name \":\") input 
   905 in 
   905 in 
   906   (name, map Token.name_of_src attrib)
   906   (name, map Token.name_of_src attrib)
   907 end" "(foo_lemma, [(\"intro\", \<dots>), (\"dest\", \<dots>)])"}
   907 end" "(foo_lemma, [(\"intro\", _), (\"dest\", _)])"}
   908  
   908  
   909   The function @{ML_ind opt_thm_name in Parse_Spec} is the ``optional'' variant of
   909   The function @{ML_ind opt_thm_name in Parse_Spec} is the ``optional'' variant of
   910   @{ML_ind thm_name in Parse_Spec}. Theorem names can contain attributes. The name 
   910   @{ML_ind thm_name in Parse_Spec}. Theorem names can contain attributes. The name 
   911   has to end with @{text [quotes] ":"}---see the argument of 
   911   has to end with @{text [quotes] ":"}---see the argument of 
   912   the function @{ML Parse_Spec.opt_thm_name} in Line 7.
   912   the function @{ML Parse_Spec.opt_thm_name} in Line 7.