--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/ho06.tex Fri Nov 01 11:57:04 2013 +0000
@@ -0,0 +1,183 @@
+\documentclass{article}
+\usepackage{charter}
+\usepackage{hyperref}
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage[T1]{fontenc}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{tikz}
+\usetikzlibrary{arrows}
+\usetikzlibrary{automata}
+\usetikzlibrary{shapes}
+\usetikzlibrary{shadows}
+\usetikzlibrary{positioning}
+\usetikzlibrary{calc}
+\usetikzlibrary{fit}
+\usetikzlibrary{backgrounds}
+\usepackage{fontspec}
+\setmonofont{Consolas}
+
+\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
+
+\definecolor{javared}{rgb}{0.6,0,0} % for strings
+\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
+\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
+\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
+
+\lstdefinelanguage{scala}{
+ morekeywords={abstract,case,catch,class,def,%
+ do,else,extends,false,final,finally,%
+ for,if,implicit,import,match,mixin,%
+ new,null,object,override,package,%
+ private,protected,requires,return,sealed,%
+ super,this,throw,trait,true,try,%
+ type,val,var,while,with,yield},
+ otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+ sensitive=true,
+ morecomment=[l]{//},
+ morecomment=[n]{/*}{*/},
+ morestring=[b]",
+ morestring=[b]',
+ morestring=[b]"""
+}
+
+\lstdefinelanguage{while}{
+ morekeywords={while, if, then. else, read, write},
+ otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+ sensitive=true,
+ morecomment=[l]{//},
+ morecomment=[n]{/*}{*/},
+ morestring=[b]",
+ morestring=[b]',
+ morestring=[b]"""
+}
+
+
+\lstset{language=Scala,
+ basicstyle=\ttfamily,
+ keywordstyle=\color{javapurple}\bfseries,
+ stringstyle=\color{javagreen},
+ commentstyle=\color{javagreen},
+ morecomment=[s][\color{javadocblue}]{/**}{*/},
+ numbers=left,
+ numberstyle=\tiny\color{black},
+ stepnumber=1,
+ numbersep=10pt,
+ tabsize=2,
+ showspaces=false,
+ showstringspaces=false}
+
+\newcommand\grid[1]{%
+\begin{tikzpicture}[baseline=(char.base)]
+ \path[use as bounding box]
+ (0,0) rectangle (1em,1em);
+ \draw[red!50, fill=red!20]
+ (0,0) rectangle (1em,1em);
+ \node[inner sep=1pt,anchor=base west]
+ (char) at (0em,\gridraiseamount) {#1};
+\end{tikzpicture}}
+\newcommand\gridraiseamount{0.12em}
+
+\makeatletter
+\newcommand\Grid[1]{%
+ \@tfor\z:=#1\do{\grid{\z}}}
+\makeatother
+
+\newcommand\Vspace[1][.3em]{%
+ \mbox{\kern.06em\vrle height.3ex}%
+ \vbox{\hrule width#1}%
+ \hbox{\vrule height.3ex}}
+
+\def\VS{\Vspace[0.6em]}
+
+\begin{document}
+
+\section*{Handout 6}
+
+While regular expressions are very useful for lexing and for recognising
+many patterns (like email addresses), they have their limitations. For
+example there is no regular expression that can recognise the language
+$a^nb^n$. Another example is the language of well-parenthesised
+expressions. In languages like Lisp, which use parentheses rather
+extensively, it might be of interest whether the following two expressions
+are well-parenthesised (the left one is, the right one is not):
+
+\begin{center}
+$(((()()))())$ \hspace{10mm} $(((()()))()))$
+\end{center}
+
+In order to solve such recognition problems, we need more powerful
+techniques than regular expressions. We will in particular look at \emph{context-free
+languages}. They include the regular languages as the picture below shows:
+
+
+\begin{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, top color=white,bottom color=black!20, rectangle, very thick, rounded corners}]
+
+\draw (0,0) node [rect, text depth=30mm, text width=46mm] {all languages};
+\draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {decidable languages};
+\draw (0,-0.65) node [rect, text depth=13mm] {context sensitive languages};
+\draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {context-free languages};
+\draw (0,-1.05) node [rect] {regular languages};
+\end{tikzpicture}
+\end{center}
+
+\noindent
+Context-free languages play an important role in `day-to-day' text processing and in
+programming languages. Context-free languages are usually specified by grammars.
+For example a grammar for well-parenthesised expressions is
+
+\begin{center}
+$P \;\;\rightarrow\;\; ( \cdot P \cdot ) \cdot P \;|\; \epsilon$
+\end{center}
+
+\noindent
+In general grammars consist of finitely many rules built up from terminal symbols (usually lower-case letters)
+and non-terminal symbols (upper-case letters). Rules have the shape
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}$
+\end{center}
+
+\noindent
+where on the left-hand side is a single non-terminal and on the right a string consisting
+of both terminals and non-terminals including the $\epsilon$-symbol for indicating the
+empty string. We use the convention to separate components on
+the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised expressions.
+We also use the convention to use $|$ as a shorthand notation for several rules. For example
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$
+\end{center}
+
+\noindent
+means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$.
+If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate
+what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions
+can be given as follows
+
+\begin{center}
+\begin{tabular}{lcl}
+$E$ & $\rightarrow$ & $N$ \\
+$E$ & $\rightarrow$ & $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ & $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ & $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ & $( \cdot E \cdot )$\\
+$N$ & $\rightarrow$ & $\epsilon \;|\; 0 \cdot N \;|\; 1 \cdot N \;|\: \ldots \;|\; 9 \cdot N$
+\end{tabular}
+\end{center}
+
+\noindent
+where $E$ is the starting symbol. A \emph{derivation} for a grammar
+starts with the staring symbol of the grammar and in each step replaces one
+non-terminal by a right-hand side of a rule.
+
+
+\end{document}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End: