# HG changeset patch # User Christian Urban # Date 1382717179 -3600 # Node ID 70ab41cb610e1696d0b1e8b3e0e6c6a456eb86c8 # Parent 90e27fafc5c7312840c152e9c087e5e0d4b8a4df added diff -r 90e27fafc5c7 -r 70ab41cb610e handouts/ho05.pdf Binary file handouts/ho05.pdf has changed diff -r 90e27fafc5c7 -r 70ab41cb610e handouts/ho05.tex --- a/handouts/ho05.tex Fri Oct 25 14:33:35 2013 +0100 +++ b/handouts/ho05.tex Fri Oct 25 17:06:19 2013 +0100 @@ -15,6 +15,8 @@ \usetikzlibrary{calc} \usetikzlibrary{fit} \usetikzlibrary{backgrounds} +\usepackage{fontspec} +\setmonofont{Consolas} \newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% @@ -40,6 +42,18 @@ morestring=[b]""" } +\lstdefinelanguage{while}{ + morekeywords={while, if, then. else, read, write}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + + \lstset{language=Scala, basicstyle=\ttfamily, keywordstyle=\color{javapurple}\bfseries, @@ -54,18 +68,109 @@ showspaces=false, showstringspaces=false} +\newcommand\grid[1]{% +\begin{tikzpicture}[baseline=(char.base)] + \path[use as bounding box] + (0,0) rectangle (1em,1em); + \draw[red!50, fill=red!20] + (0,0) rectangle (1em,1em); + \node[inner sep=1pt,anchor=base west] + (char) at (0em,\gridraiseamount) {#1}; +\end{tikzpicture}} +\newcommand\gridraiseamount{0.12em} + +\makeatletter +\newcommand\Grid[1]{% + \@tfor\z:=#1\do{\grid{\z}}} +\makeatother + +\newcommand\Vspace[1][.3em]{% + \mbox{\kern.06em\vrule height.3ex}% + \vbox{\hrule width#1}% + \hbox{\vrule height.3ex}} + +\def\VS{\Vspace[0.6em]} + \begin{document} \section*{Handout 5} Whenever you want to design a programming language or implement a compiler for an -existing language, the first task is to fix the basic ``words'' of the language, like what are the k -eywords, what are permitted identifiers and so on. One convenient way to do this is, of +existing language, the first task is to fix the basic ``words'' of the language, like what are the +keywords or reserved words of the language, what are permitted identifiers, numbers and so +on. One convenient way to do this is, of course, to use regular expressions. In this course we want to take a closer look at the WHILE-language. This is a simple imperative language consisting of arithmetic expressions, assignments and loops only. For example the Fibonacci program can be written in this language as follows +\begin{center} +\mbox{\lstinputlisting[language=while]{../progs/fib.while}} +\end{center} + +\noindent +The keywords in this language will be + +\begin{center} +\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read} +\end{center} + +\noindent +In addition we will have some typical operators, like \texttt{<}, \texttt{>}, \texttt{:=} and so on; numbers +and strings (which we however ignore for the moment). We also need to specify what the ``white space'' +is in our programming language as well as what comments should look like. +We might specify the regular expressions for our language roughly as follows + +\begin{center} +\begin{tabular}{lcl} +$\textit{KEYWORD}$ & $:=$ & $\texttt{while} + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\ +$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ +$\textit{OP}$ & $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\ +$\textit{NUM}$ & $:=$ & $\textit{DIGIT}^+$\\ +$\textit{WHITESPACE}$ & $:=$ & $"\hspace{2mm}" + \backslash\texttt{n}$ +\end{tabular} +\end{center} + +\noindent +with the usual meanings for the regular expressions $\textit{LETTER}$ and $\textit{DIGIT}$. + +The problem we have to solve is given a string of our programming language, which regular expression +matches which part of the string. For example the input string + +\begin{center} +\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}} +\end{center} + +\noindent +needs to be recognised as + +\begin{center} +\tt +\Grid{if} +\Grid{\VS} +\Grid{true} +\Grid{\VS} +\Grid{then} +\Grid{\VS} +\Grid{x} +\Grid{+} +\Grid{2} +\Grid{\VS} +\Grid{else} +\Grid{\VS} +\Grid{x} +\Grid{+} +\Grid{3} +\end{center} + +\noindent +Since \texttt{if} matches the \textit{KEYWORD} regular expression, \VS{} is a whitespace and so on. This process +of separating an input string into components is often called \emph{lexing} or \emph{scanning}. +It is usually the first phase of a compiler. Note that the separation into words cannot, in general, +be done by looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace, +the components in \texttt{x+2} are not. Another reason for recognising whitespaces explicitly is +that in some languages, for example Python, whitespace matters. However in our small language we will eventually filter out all whitespaces and also comments. + \end{document} diff -r 90e27fafc5c7 -r 70ab41cb610e slides/slides05.pdf Binary file slides/slides05.pdf has changed diff -r 90e27fafc5c7 -r 70ab41cb610e slides/slides05.tex --- a/slides/slides05.tex Fri Oct 25 14:33:35 2013 +0100 +++ b/slides/slides05.tex Fri Oct 25 17:06:19 2013 +0100 @@ -20,6 +20,8 @@ \usetikzlibrary{calc} \usepackage{graphicx} \usepackage{pgfplots} +\usepackage{fontspec} +\setmonofont{Consolas} \definecolor{javared}{rgb}{0.6,0,0} % for strings \definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments @@ -31,19 +33,6 @@ \@empty\z@\@empty \makeatother -\lstset{language=Java, - basicstyle=\consolas, - keywordstyle=\color{javapurple}\bfseries, - stringstyle=\color{javagreen}, - commentstyle=\color{javagreen}, - morecomment=[s][\color{javadocblue}]{/**}{*/}, - numbers=left, - numberstyle=\tiny\color{black}, - stepnumber=1, - numbersep=10pt, - tabsize=2, - showspaces=false, - showstringspaces=false} \lstdefinelanguage{scala}{ morekeywords={abstract,case,catch,class,def,% @@ -52,8 +41,8 @@ new,null,object,override,package,% private,protected,requires,return,sealed,% super,this,throw,trait,true,try,% - type,val,var,while,with,yield, then}, - otherkeywords={=>,<-,<\%,<:,>:,\#,@,->}, + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, sensitive=true, morecomment=[l]{//}, morecomment=[n]{/*}{*/}, @@ -62,8 +51,20 @@ morestring=[b]""" } +\lstdefinelanguage{while}{ + morekeywords={while, if, then. else, read, write}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + + \lstset{language=Scala, - basicstyle=\consolas, + basicstyle=\ttfamily, keywordstyle=\color{javapurple}\bfseries, stringstyle=\color{javagreen}, commentstyle=\color{javagreen}, @@ -342,7 +343,7 @@ \mode{ \begin{frame}[c] -\texttt{\consolas\lstinputlisting{../progs/fib.while}} +\mbox{\lstinputlisting[language=while]{../progs/fib.while}} \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -351,7 +352,7 @@ \mode{ \begin{frame}[c] -\texttt{\consolas\lstinputlisting{../progs/collatz.while}} +\mbox{\lstinputlisting[language=while]{../progs/collatz.while}} \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -361,7 +362,7 @@ \mode{ \begin{frame}[c] -\texttt{\consolas\lstinputlisting{../progs/loops.while}} +\mbox{\lstinputlisting[language=while]{../progs/loops.while}} \begin{textblock}{6}(10,2) \begin{tikzpicture}[scale=0.46]