--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/handouts/ho06.tex	Fri Nov 01 11:57:04 2013 +0000
@@ -0,0 +1,183 @@
+\documentclass{article}
+\usepackage{charter}
+\usepackage{hyperref}
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage[T1]{fontenc}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{tikz}
+\usetikzlibrary{arrows}
+\usetikzlibrary{automata}
+\usetikzlibrary{shapes}
+\usetikzlibrary{shadows}
+\usetikzlibrary{positioning}
+\usetikzlibrary{calc}
+\usetikzlibrary{fit}
+\usetikzlibrary{backgrounds}
+\usepackage{fontspec}
+\setmonofont{Consolas}
+
+\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
+
+\definecolor{javared}{rgb}{0.6,0,0} % for strings
+\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
+\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
+\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
+
+\lstdefinelanguage{scala}{
+  morekeywords={abstract,case,catch,class,def,%
+    do,else,extends,false,final,finally,%
+    for,if,implicit,import,match,mixin,%
+    new,null,object,override,package,%
+    private,protected,requires,return,sealed,%
+    super,this,throw,trait,true,try,%
+    type,val,var,while,with,yield},
+  otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+  sensitive=true,
+  morecomment=[l]{//},
+  morecomment=[n]{/*}{*/},
+  morestring=[b]",
+  morestring=[b]',
+  morestring=[b]"""
+}
+
+\lstdefinelanguage{while}{
+  morekeywords={while, if, then. else, read, write},
+  otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+  sensitive=true,
+  morecomment=[l]{//},
+  morecomment=[n]{/*}{*/},
+  morestring=[b]",
+  morestring=[b]',
+  morestring=[b]"""
+}
+
+
+\lstset{language=Scala,
+	basicstyle=\ttfamily,
+	keywordstyle=\color{javapurple}\bfseries,
+	stringstyle=\color{javagreen},
+	commentstyle=\color{javagreen},
+	morecomment=[s][\color{javadocblue}]{/**}{*/},
+	numbers=left,
+	numberstyle=\tiny\color{black},
+	stepnumber=1,
+	numbersep=10pt,
+	tabsize=2,
+	showspaces=false,
+	showstringspaces=false}
+	
+\newcommand\grid[1]{%
+\begin{tikzpicture}[baseline=(char.base)]
+  \path[use as bounding box]
+    (0,0) rectangle (1em,1em);
+  \draw[red!50, fill=red!20]
+    (0,0) rectangle (1em,1em);
+  \node[inner sep=1pt,anchor=base west]
+    (char) at (0em,\gridraiseamount) {#1};
+\end{tikzpicture}}
+\newcommand\gridraiseamount{0.12em}
+
+\makeatletter
+\newcommand\Grid[1]{%
+  \@tfor\z:=#1\do{\grid{\z}}}
+\makeatother	
+
+\newcommand\Vspace[1][.3em]{%
+  \mbox{\kern.06em\vrle height.3ex}%
+  \vbox{\hrule width#1}%
+  \hbox{\vrule height.3ex}}
+
+\def\VS{\Vspace[0.6em]}
+	
+\begin{document}
+
+\section*{Handout 6}
+
+While regular expressions are very useful for lexing and for recognising
+many patterns (like email addresses), they have their limitations. For
+example there is no regular expression that can recognise the language 
+$a^nb^n$. Another example is the language of well-parenthesised 
+expressions.  In languages like Lisp, which use parentheses rather
+extensively, it might be of interest whether the following two expressions
+are well-parenthesised (the left one is, the right one is not):
+
+\begin{center}
+$(((()()))())$  \hspace{10mm} $(((()()))()))$
+\end{center}
+
+In order to solve such recognition problems, we need more powerful 
+techniques than regular expressions. We will in particular look at \emph{context-free
+languages}. They include the regular languages as the picture below shows:
+
+
+\begin{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, top color=white,bottom color=black!20, rectangle, very thick, rounded corners}]
+
+\draw (0,0) node [rect, text depth=30mm, text width=46mm] {all languages};
+\draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {decidable languages};
+\draw (0,-0.65) node [rect, text depth=13mm] {context sensitive languages};
+\draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {context-free languages};
+\draw (0,-1.05) node [rect] {regular languages};
+\end{tikzpicture}
+\end{center}
+
+\noindent
+Context-free languages play an important role in `day-to-day' text processing and in
+programming languages. Context-free languages are usually specified by grammars.
+For example a grammar for well-parenthesised  expressions is
+
+\begin{center}
+$P \;\;\rightarrow\;\; ( \cdot  P \cdot ) \cdot P \;|\; \epsilon$
+\end{center}
+ 
+\noindent
+In general grammars consist of finitely many rules built up from terminal symbols (usually lower-case letters)
+and non-terminal symbols (upper-case letters).  Rules have the shape
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}$
+\end{center}
+ 
+\noindent
+where on the left-hand side is a single non-terminal and on the right a string consisting
+of both terminals and non-terminals including the $\epsilon$-symbol for indicating the
+empty string. We use the convention  to separate components on
+the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised  expressions.
+We also use the convention to use $|$ as a shorthand notation for several rules. For example
+
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$
+\end{center}
+
+\noindent
+means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$.
+If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate
+what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions
+can be given as follows
+
+\begin{center}
+\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $N$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$\\
+$N$ & $\rightarrow$ & $\epsilon \;|\; 0 \cdot N \;|\; 1 \cdot N \;|\: \ldots \;|\; 9 \cdot N$ 
+\end{tabular}
+\end{center}
+
+\noindent
+where $E$ is the starting symbol. A \emph{derivation} for a grammar
+starts with the staring symbol of the grammar and in each step replaces one
+non-terminal by a right-hand side of a rule.
+
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex  
+%%% TeX-master: t
+%%% End: