slides04.tex
changeset 34 eeff9953a1c1
parent 33 92b3e287d87e
child 35 487b0c0aef75
equal deleted inserted replaced
33:92b3e287d87e 34:eeff9953a1c1
   111 \item tokenizer
   111 \item tokenizer
   112 
   112 
   113 \item tokenization identifies lexeme in an input stream of characters (or string)
   113 \item tokenization identifies lexeme in an input stream of characters (or string)
   114 and categorizes them into tokens
   114 and categorizes them into tokens
   115 
   115 
   116 \item maximal munch rule
   116 \item longest match rule (maximal munch rule): The 
       
   117 longest initial substring matched by any regular expression is taken
       
   118 as next token.
       
   119 
       
   120 \item Rule priority:
       
   121 For a particular longest initial substring, the first regular
       
   122 expression that can match determines the token.
       
   123 
       
   124 \item problem with infix operations, for example i-12
   117 \end{itemize}
   125 \end{itemize}
   118 
   126 
   119 \url{http://www.technologyreview.com/tr10/?year=2011}
   127 \url{http://www.technologyreview.com/tr10/?year=2011}
   120   
   128   
       
   129 finite deterministic automata/ nondeterministic automaton
       
   130 
       
   131 
       
   132 
   121 \end{frame}}
   133 \end{frame}}
   122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   134 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   123 
   135 
   124 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   136 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   125 \mode<presentation>{
   137 \mode<presentation>{