handouts/ho06.tex
changeset 173 7cfb7a6f7c99
child 175 5801e8c0e528
equal deleted inserted replaced
172:47b5c91eff47 173:7cfb7a6f7c99
       
     1 \documentclass{article}
       
     2 \usepackage{charter}
       
     3 \usepackage{hyperref}
       
     4 \usepackage{amssymb}
       
     5 \usepackage{amsmath}
       
     6 \usepackage[T1]{fontenc}
       
     7 \usepackage{listings}
       
     8 \usepackage{xcolor}
       
     9 \usepackage{tikz}
       
    10 \usetikzlibrary{arrows}
       
    11 \usetikzlibrary{automata}
       
    12 \usetikzlibrary{shapes}
       
    13 \usetikzlibrary{shadows}
       
    14 \usetikzlibrary{positioning}
       
    15 \usetikzlibrary{calc}
       
    16 \usetikzlibrary{fit}
       
    17 \usetikzlibrary{backgrounds}
       
    18 \usepackage{fontspec}
       
    19 \setmonofont{Consolas}
       
    20 
       
    21 \newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
       
    22 
       
    23 \definecolor{javared}{rgb}{0.6,0,0} % for strings
       
    24 \definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
       
    25 \definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
       
    26 \definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
       
    27 
       
    28 \lstdefinelanguage{scala}{
       
    29   morekeywords={abstract,case,catch,class,def,%
       
    30     do,else,extends,false,final,finally,%
       
    31     for,if,implicit,import,match,mixin,%
       
    32     new,null,object,override,package,%
       
    33     private,protected,requires,return,sealed,%
       
    34     super,this,throw,trait,true,try,%
       
    35     type,val,var,while,with,yield},
       
    36   otherkeywords={=>,<-,<\%,<:,>:,\#,@},
       
    37   sensitive=true,
       
    38   morecomment=[l]{//},
       
    39   morecomment=[n]{/*}{*/},
       
    40   morestring=[b]",
       
    41   morestring=[b]',
       
    42   morestring=[b]"""
       
    43 }
       
    44 
       
    45 \lstdefinelanguage{while}{
       
    46   morekeywords={while, if, then. else, read, write},
       
    47   otherkeywords={=>,<-,<\%,<:,>:,\#,@},
       
    48   sensitive=true,
       
    49   morecomment=[l]{//},
       
    50   morecomment=[n]{/*}{*/},
       
    51   morestring=[b]",
       
    52   morestring=[b]',
       
    53   morestring=[b]"""
       
    54 }
       
    55 
       
    56 
       
    57 \lstset{language=Scala,
       
    58 	basicstyle=\ttfamily,
       
    59 	keywordstyle=\color{javapurple}\bfseries,
       
    60 	stringstyle=\color{javagreen},
       
    61 	commentstyle=\color{javagreen},
       
    62 	morecomment=[s][\color{javadocblue}]{/**}{*/},
       
    63 	numbers=left,
       
    64 	numberstyle=\tiny\color{black},
       
    65 	stepnumber=1,
       
    66 	numbersep=10pt,
       
    67 	tabsize=2,
       
    68 	showspaces=false,
       
    69 	showstringspaces=false}
       
    70 	
       
    71 \newcommand\grid[1]{%
       
    72 \begin{tikzpicture}[baseline=(char.base)]
       
    73   \path[use as bounding box]
       
    74     (0,0) rectangle (1em,1em);
       
    75   \draw[red!50, fill=red!20]
       
    76     (0,0) rectangle (1em,1em);
       
    77   \node[inner sep=1pt,anchor=base west]
       
    78     (char) at (0em,\gridraiseamount) {#1};
       
    79 \end{tikzpicture}}
       
    80 \newcommand\gridraiseamount{0.12em}
       
    81 
       
    82 \makeatletter
       
    83 \newcommand\Grid[1]{%
       
    84   \@tfor\z:=#1\do{\grid{\z}}}
       
    85 \makeatother	
       
    86 
       
    87 \newcommand\Vspace[1][.3em]{%
       
    88   \mbox{\kern.06em\vrle height.3ex}%
       
    89   \vbox{\hrule width#1}%
       
    90   \hbox{\vrule height.3ex}}
       
    91 
       
    92 \def\VS{\Vspace[0.6em]}
       
    93 	
       
    94 \begin{document}
       
    95 
       
    96 \section*{Handout 6}
       
    97 
       
    98 While regular expressions are very useful for lexing and for recognising
       
    99 many patterns (like email addresses), they have their limitations. For
       
   100 example there is no regular expression that can recognise the language 
       
   101 $a^nb^n$. Another example is the language of well-parenthesised 
       
   102 expressions.  In languages like Lisp, which use parentheses rather
       
   103 extensively, it might be of interest whether the following two expressions
       
   104 are well-parenthesised (the left one is, the right one is not):
       
   105 
       
   106 \begin{center}
       
   107 $(((()()))())$  \hspace{10mm} $(((()()))()))$
       
   108 \end{center}
       
   109 
       
   110 In order to solve such recognition problems, we need more powerful 
       
   111 techniques than regular expressions. We will in particular look at \emph{context-free
       
   112 languages}. They include the regular languages as the picture below shows:
       
   113 
       
   114 
       
   115 \begin{center}
       
   116 \begin{tikzpicture}
       
   117 [rect/.style={draw=black!50, top color=white,bottom color=black!20, rectangle, very thick, rounded corners}]
       
   118 
       
   119 \draw (0,0) node [rect, text depth=30mm, text width=46mm] {all languages};
       
   120 \draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {decidable languages};
       
   121 \draw (0,-0.65) node [rect, text depth=13mm] {context sensitive languages};
       
   122 \draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {context-free languages};
       
   123 \draw (0,-1.05) node [rect] {regular languages};
       
   124 \end{tikzpicture}
       
   125 \end{center}
       
   126 
       
   127 \noindent
       
   128 Context-free languages play an important role in `day-to-day' text processing and in
       
   129 programming languages. Context-free languages are usually specified by grammars.
       
   130 For example a grammar for well-parenthesised  expressions is
       
   131 
       
   132 \begin{center}
       
   133 $P \;\;\rightarrow\;\; ( \cdot  P \cdot ) \cdot P \;|\; \epsilon$
       
   134 \end{center}
       
   135  
       
   136 \noindent
       
   137 In general grammars consist of finitely many rules built up from terminal symbols (usually lower-case letters)
       
   138 and non-terminal symbols (upper-case letters).  Rules have the shape
       
   139 
       
   140 \begin{center}
       
   141 $NT \;\;\rightarrow\;\; \textit{rhs}$
       
   142 \end{center}
       
   143  
       
   144 \noindent
       
   145 where on the left-hand side is a single non-terminal and on the right a string consisting
       
   146 of both terminals and non-terminals including the $\epsilon$-symbol for indicating the
       
   147 empty string. We use the convention  to separate components on
       
   148 the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised  expressions.
       
   149 We also use the convention to use $|$ as a shorthand notation for several rules. For example
       
   150 
       
   151 \begin{center}
       
   152 $NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$
       
   153 \end{center}
       
   154 
       
   155 \noindent
       
   156 means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$.
       
   157 If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate
       
   158 what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions
       
   159 can be given as follows
       
   160 
       
   161 \begin{center}
       
   162 \begin{tabular}{lcl}
       
   163 $E$ & $\rightarrow$ &  $N$ \\
       
   164 $E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
       
   165 $E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
       
   166 $E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
       
   167 $E$ & $\rightarrow$ &  $( \cdot E \cdot )$\\
       
   168 $N$ & $\rightarrow$ & $\epsilon \;|\; 0 \cdot N \;|\; 1 \cdot N \;|\: \ldots \;|\; 9 \cdot N$ 
       
   169 \end{tabular}
       
   170 \end{center}
       
   171 
       
   172 \noindent
       
   173 where $E$ is the starting symbol. A \emph{derivation} for a grammar
       
   174 starts with the staring symbol of the grammar and in each step replaces one
       
   175 non-terminal by a right-hand side of a rule.
       
   176 
       
   177 
       
   178 \end{document}
       
   179 
       
   180 %%% Local Variables: 
       
   181 %%% mode: latex  
       
   182 %%% TeX-master: t
       
   183 %%% End: