ProgTutorial/Parsing.thy
changeset 558 84aef87b348a
parent 556 3c214b215f7e
child 559 ffa5c4ec9611
equal deleted inserted replaced
557:77ea2de0ca62 558:84aef87b348a
   500   @{ML "Position.none"} since,
   500   @{ML "Position.none"} since,
   501   at the moment, we are not interested in generating precise error
   501   at the moment, we are not interested in generating precise error
   502   messages. The following code
   502   messages. The following code
   503 
   503 
   504 
   504 
   505 @{ML_response_fake [display,gray] "Outer_Syntax.scan Position.none \"hello world\"" 
   505 @{ML_response_fake [display,gray] "Outer_Syntax.scan 
       
   506   (Keyword.get_lexicons ()) Position.none \"hello world\"" 
   506 "[Token (\<dots>,(Ident, \"hello\"),\<dots>), 
   507 "[Token (\<dots>,(Ident, \"hello\"),\<dots>), 
   507  Token (\<dots>,(Space, \" \"),\<dots>), 
   508  Token (\<dots>,(Space, \" \"),\<dots>), 
   508  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   509  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   509 
   510 
   510   produces three tokens where the first and the last are identifiers, since
   511   produces three tokens where the first and the last are identifiers, since
   518 ML %grayML{*val _ = Keyword.define ("hello", NONE) *}
   519 ML %grayML{*val _ = Keyword.define ("hello", NONE) *}
   519 
   520 
   520 text {*
   521 text {*
   521   then lexing @{text [quotes] "hello world"} will produce
   522   then lexing @{text [quotes] "hello world"} will produce
   522 
   523 
   523   @{ML_response_fake [display,gray] "Outer_Syntax.scan Position.none \"hello world\"" 
   524   @{ML_response_fake [display,gray] "Outer_Syntax.scan 
       
   525   (Keyword.get_lexicons ()) Position.none \"hello world\"" 
   524 "[Token (\<dots>,(Keyword, \"hello\"),\<dots>), 
   526 "[Token (\<dots>,(Keyword, \"hello\"),\<dots>), 
   525  Token (\<dots>,(Space, \" \"),\<dots>), 
   527  Token (\<dots>,(Space, \" \"),\<dots>), 
   526  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   528  Token (\<dots>,(Ident, \"world\"),\<dots>)]"}
   527 
   529 
   528   Many parsing functions later on will require white space, comments and the like
   530   Many parsing functions later on will require white space, comments and the like
   530   functions @{ML filter} and @{ML_ind is_proper in Token} to do this. 
   532   functions @{ML filter} and @{ML_ind is_proper in Token} to do this. 
   531   For example:
   533   For example:
   532 
   534 
   533 @{ML_response_fake [display,gray]
   535 @{ML_response_fake [display,gray]
   534 "let
   536 "let
   535    val input = Outer_Syntax.scan Position.none \"hello world\"
   537    val input = Outer_Syntax.scan (Keyword.get_lexicons ()) Position.none \"hello world\"
   536 in
   538 in
   537    filter Token.is_proper input
   539    filter Token.is_proper input
   538 end" 
   540 end" 
   539 "[Token (\<dots>,(Ident, \"hello\"), \<dots>), Token (\<dots>,(Ident, \"world\"), \<dots>)]"}
   541 "[Token (\<dots>,(Ident, \"hello\"), \<dots>), Token (\<dots>,(Ident, \"world\"), \<dots>)]"}
   540 
   542 
   541   For convenience we define the function:
   543   For convenience we define the function:
   542 *}
   544 *}
   543 
   545 
   544 ML %grayML{*fun filtered_input str = 
   546 ML %grayML{*fun filtered_input str = 
   545   filter Token.is_proper (Outer_Syntax.scan Position.none str) *}
   547   filter Token.is_proper (Outer_Syntax.scan (Keyword.get_lexicons ()) Position.none str) *}
   546 
   548 
   547 text {* 
   549 text {* 
   548   If you now parse
   550   If you now parse
   549 
   551 
   550 @{ML_response_fake [display,gray] 
   552 @{ML_response_fake [display,gray] 
   667   and then thread this information up the ``processing chain''. To see this,
   669   and then thread this information up the ``processing chain''. To see this,
   668   modify the function @{ML filtered_input}, described earlier, as follows 
   670   modify the function @{ML filtered_input}, described earlier, as follows 
   669 *}
   671 *}
   670 
   672 
   671 ML %grayML{*fun filtered_input' str = 
   673 ML %grayML{*fun filtered_input' str = 
   672        filter Token.is_proper (Outer_Syntax.scan (Position.line 7) str) *}
   674        filter Token.is_proper (Outer_Syntax.scan (Keyword.get_lexicons ()) (Position.line 7) str) *}
   673 
   675 
   674 text {*
   676 text {*
   675   where we pretend the parsed string starts on line 7. An example is
   677   where we pretend the parsed string starts on line 7. An example is
   676 
   678 
   677 @{ML_response_fake [display,gray]
   679 @{ML_response_fake [display,gray]
   756   for terms and  types: you can just call the predefined parsers. Terms can 
   758   for terms and  types: you can just call the predefined parsers. Terms can 
   757   be parsed using the function @{ML_ind term in Parse}. For example:
   759   be parsed using the function @{ML_ind term in Parse}. For example:
   758 
   760 
   759 @{ML_response_fake [display,gray]
   761 @{ML_response_fake [display,gray]
   760 "let 
   762 "let 
   761   val input = Outer_Syntax.scan Position.none \"foo\"
   763   val input = Outer_Syntax.scan (Keyword.get_lexicons ()) Position.none \"foo\"
   762 in 
   764 in 
   763   Parse.term input
   765   Parse.term input
   764 end"
   766 end"
   765 "(\"<markup>\", [])"}
   767 "(\"<markup>\", [])"}
   766 
   768 
   772   The result of the decoding is an XML-tree. You can see better what is going on if
   774   The result of the decoding is an XML-tree. You can see better what is going on if
   773   you replace @{ML Position.none} by @{ML "Position.line 42"}, say:
   775   you replace @{ML Position.none} by @{ML "Position.line 42"}, say:
   774 
   776 
   775 @{ML_response_fake [display,gray]
   777 @{ML_response_fake [display,gray]
   776 "let 
   778 "let 
   777   val input = Outer_Syntax.scan (Position.line 42) \"foo\"
   779   val input = Outer_Syntax.scan (Keyword.get_lexicons ()) (Position.line 42) \"foo\"
   778 in 
   780 in 
   779   YXML.parse (fst (Parse.term input))
   781   YXML.parse (fst (Parse.term input))
   780 end"
   782 end"
   781 "Elem (\"token\", [(\"line\", \"42\"), (\"end_line\", \"42\")], [XML.Text \"foo\"])"}
   783 "Elem (\"token\", [(\"line\", \"42\"), (\"end_line\", \"42\")], [XML.Text \"foo\"])"}
   782 
   784 
   895 
   897 
   896 @{ML_response [display,gray] "let 
   898 @{ML_response [display,gray] "let 
   897   val input = filtered_input \"foo_lemma[intro,dest!]:\"
   899   val input = filtered_input \"foo_lemma[intro,dest!]:\"
   898   val ((name, attrib), _) = parse (Parse_Spec.thm_name \":\") input 
   900   val ((name, attrib), _) = parse (Parse_Spec.thm_name \":\") input 
   899 in 
   901 in 
   900   (name, map Args.name_of_src attrib)
   902   (name, map Token.name_of_src attrib)
   901 end" "(foo_lemma, [(\"intro\", \<dots>), (\"dest\", \<dots>)])"}
   903 end" "(foo_lemma, [(\"intro\", \<dots>), (\"dest\", \<dots>)])"}
   902  
   904  
   903   The function @{ML_ind opt_thm_name in Parse_Spec} is the ``optional'' variant of
   905   The function @{ML_ind opt_thm_name in Parse_Spec} is the ``optional'' variant of
   904   @{ML_ind thm_name in Parse_Spec}. Theorem names can contain attributes. The name 
   906   @{ML_ind thm_name in Parse_Spec}. Theorem names can contain attributes. The name 
   905   has to end with @{text [quotes] ":"}---see the argument of 
   907   has to end with @{text [quotes] ":"}---see the argument of