afl-material: comparison handouts/ho06.tex

equal deleted inserted replaced

-:47b5c91eff47
+:7cfb7a6f7c99
+\documentclass{article}
+\usepackage{charter}
+\usepackage{hyperref}
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage[T1]{fontenc}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{tikz}
+\usetikzlibrary{arrows}
+\usetikzlibrary{automata}
+\usetikzlibrary{shapes}
+\usetikzlibrary{shadows}
+\usetikzlibrary{positioning}
+\usetikzlibrary{calc}
+\usetikzlibrary{fit}
+\usetikzlibrary{backgrounds}
+\usepackage{fontspec}
+\setmonofont{Consolas}
+\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
+\definecolor{javared}{rgb}{0.6,0,0} % for strings
+\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
+\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
+\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
+\lstdefinelanguage{scala}{
+morekeywords={abstract,case,catch,class,def,%
+do,else,extends,false,final,finally,%
+for,if,implicit,import,match,mixin,%
+new,null,object,override,package,%
+private,protected,requires,return,sealed,%
+super,this,throw,trait,true,try,%
+type,val,var,while,with,yield},
+otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+sensitive=true,
+morecomment=[l]{//},
+morecomment=[n]{/*}{*/},
+morestring=[b]",
+morestring=[b]',
+morestring=[b]"""
+}
+\lstdefinelanguage{while}{
+morekeywords={while, if, then. else, read, write},
+otherkeywords={=>,<-,<\%,<:,>:,\#,@},
+sensitive=true,
+morecomment=[l]{//},
+morecomment=[n]{/*}{*/},
+morestring=[b]",
+morestring=[b]',
+morestring=[b]"""
+}
+\lstset{language=Scala,
+	basicstyle=\ttfamily,
+	keywordstyle=\color{javapurple}\bfseries,
+	stringstyle=\color{javagreen},
+	commentstyle=\color{javagreen},
+	morecomment=[s][\color{javadocblue}]{/**}{*/},
+	numbers=left,
+	numberstyle=\tiny\color{black},
+	stepnumber=1,
+	numbersep=10pt,
+	tabsize=2,
+	showspaces=false,
+	showstringspaces=false}
+\newcommand\grid[1]{%
+\begin{tikzpicture}[baseline=(char.base)]
+\path[use as bounding box]
+(0,0) rectangle (1em,1em);
+\draw[red!50, fill=red!20]
+(0,0) rectangle (1em,1em);
+\node[inner sep=1pt,anchor=base west]
+(char) at (0em,\gridraiseamount) {#1};
+\end{tikzpicture}}
+\newcommand\gridraiseamount{0.12em}
+\makeatletter
+\newcommand\Grid[1]{%
+\@tfor\z:=#1\do{\grid{\z}}}
+\makeatother
+\newcommand\Vspace[1][.3em]{%
+\mbox{\kern.06em\vrle height.3ex}%
+\vbox{\hrule width#1}%
+\hbox{\vrule height.3ex}}
+\def\VS{\Vspace[0.6em]}
+\begin{document}
+\section*{Handout 6}
+While regular expressions are very useful for lexing and for recognising
+many patterns (like email addresses), they have their limitations. For
+example there is no regular expression that can recognise the language
+$a^nb^n$. Another example is the language of well-parenthesised
+expressions.  In languages like Lisp, which use parentheses rather
+extensively, it might be of interest whether the following two expressions
+are well-parenthesised (the left one is, the right one is not):
+\begin{center}
+$(((()()))())$  \hspace{10mm} $(((()()))()))$
+\end{center}
+In order to solve such recognition problems, we need more powerful
+techniques than regular expressions. We will in particular look at \emph{context-free
+languages}. They include the regular languages as the picture below shows:
+\begin{center}
+\begin{tikzpicture}
+[rect/.style={draw=black!50, top color=white,bottom color=black!20, rectangle, very thick, rounded corners}]
+\draw (0,0) node [rect, text depth=30mm, text width=46mm] {all languages};
+\draw (0,-0.4) node [rect, text depth=20mm, text width=44mm] {decidable languages};
+\draw (0,-0.65) node [rect, text depth=13mm] {context sensitive languages};
+\draw (0,-0.84) node [rect, text depth=7mm, text width=35mm] {context-free languages};
+\draw (0,-1.05) node [rect] {regular languages};
+\end{tikzpicture}
+\end{center}
+\noindent
+Context-free languages play an important role in `day-to-day' text processing and in
+programming languages. Context-free languages are usually specified by grammars.
+For example a grammar for well-parenthesised  expressions is
+\begin{center}
+$P \;\;\rightarrow\;\; ( \cdot  P \cdot ) \cdot P \;|\; \epsilon$
+\end{center}
+\noindent
+In general grammars consist of finitely many rules built up from terminal symbols (usually lower-case letters)
+and non-terminal symbols (upper-case letters).  Rules have the shape
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}$
+\end{center}
+\noindent
+where on the left-hand side is a single non-terminal and on the right a string consisting
+of both terminals and non-terminals including the $\epsilon$-symbol for indicating the
+empty string. We use the convention  to separate components on
+the right hand-side by using the $\cdot$ symbol, as in the grammar for well-parenthesised  expressions.
+We also use the convention to use $|$ as a shorthand notation for several rules. For example
+\begin{center}
+$NT \;\;\rightarrow\;\; \textit{rhs}_1 \;|\; \textit{rhs}_2$
+\end{center}
+\noindent
+means that the non-terminal $NT$ can be replaced by either $\textit{rhs}_1$ or $\textit{rhs}_2$.
+If there are more than one non-terminal on the left-hand side of the rules, then we need to indicate
+what is the \emph{starting} symbol of the grammar. For example the grammar for arithmetic expressions
+can be given as follows
+\begin{center}
+\begin{tabular}{lcl}
+$E$ & $\rightarrow$ &  $N$ \\
+$E$ & $\rightarrow$ &  $E \cdot + \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot - \cdot E$ \\
+$E$ & $\rightarrow$ &  $E \cdot * \cdot E$ \\
+$E$ & $\rightarrow$ &  $( \cdot E \cdot )$\\
+$N$ & $\rightarrow$ & $\epsilon \;|\; 0 \cdot N \;|\; 1 \cdot N \;|\: \ldots \;|\; 9 \cdot N$
+\end{tabular}
+\end{center}
+\noindent
+where $E$ is the starting symbol. A \emph{derivation} for a grammar
+starts with the staring symbol of the grammar and in each step replaces one
+non-terminal by a right-hand side of a rule.
+\end{document}
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: t
+%%% End:

changeset 173	7cfb7a6f7c99
child 175	5801e8c0e528