| author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
| Sat, 26 Oct 2013 00:49:14 +0100 | |
| changeset 156 | 95eaee695636 |
| parent 155 | 9b2d128765e1 |
| child 157 | b6eee9571a63 |
| permissions | -rw-r--r-- |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
\documentclass{article}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
\usepackage{charter}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
\usepackage{hyperref}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
\usepackage{amssymb}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
\usepackage{amsmath}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
\usepackage[T1]{fontenc}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
\usepackage{listings}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
\usepackage{xcolor}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
\usepackage{tikz}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
\usetikzlibrary{arrows}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
\usetikzlibrary{automata}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
\usetikzlibrary{shapes}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
\usetikzlibrary{shadows}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
\usetikzlibrary{positioning}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
\usetikzlibrary{calc}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
\usetikzlibrary{fit}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
\usetikzlibrary{backgrounds}
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
18 |
\usepackage{fontspec}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
19 |
\setmonofont{Consolas}
|
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
\definecolor{javared}{rgb}{0.6,0,0} % for strings
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
\lstdefinelanguage{scala}{
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
morekeywords={abstract,case,catch,class,def,%
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
do,else,extends,false,final,finally,% |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
for,if,implicit,import,match,mixin,% |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
new,null,object,override,package,% |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
private,protected,requires,return,sealed,% |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
super,this,throw,trait,true,try,% |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
type,val,var,while,with,yield}, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
otherkeywords={=>,<-,<\%,<:,>:,\#,@},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
sensitive=true, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
morecomment=[l]{//},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
morecomment=[n]{/*}{*/},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
morestring=[b]", |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
morestring=[b]', |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
morestring=[b]""" |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
} |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
45 |
\lstdefinelanguage{while}{
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
46 |
morekeywords={while, if, then. else, read, write},
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
47 |
otherkeywords={=>,<-,<\%,<:,>:,\#,@},
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
48 |
sensitive=true, |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
49 |
morecomment=[l]{//},
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
50 |
morecomment=[n]{/*}{*/},
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
51 |
morestring=[b]", |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
52 |
morestring=[b]', |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
53 |
morestring=[b]""" |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
54 |
} |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
55 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
56 |
|
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
\lstset{language=Scala,
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
basicstyle=\ttfamily, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
keywordstyle=\color{javapurple}\bfseries,
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
stringstyle=\color{javagreen},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
commentstyle=\color{javagreen},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
morecomment=[s][\color{javadocblue}]{/**}{*/},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
numbers=left, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
numberstyle=\tiny\color{black},
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
stepnumber=1, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
numbersep=10pt, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
tabsize=2, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
showspaces=false, |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
showstringspaces=false} |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
71 |
\newcommand\grid[1]{%
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
72 |
\begin{tikzpicture}[baseline=(char.base)]
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
73 |
\path[use as bounding box] |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
74 |
(0,0) rectangle (1em,1em); |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
75 |
\draw[red!50, fill=red!20] |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
76 |
(0,0) rectangle (1em,1em); |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
77 |
\node[inner sep=1pt,anchor=base west] |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
78 |
(char) at (0em,\gridraiseamount) {#1};
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
79 |
\end{tikzpicture}}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
80 |
\newcommand\gridraiseamount{0.12em}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
81 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
82 |
\makeatletter |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
83 |
\newcommand\Grid[1]{%
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
84 |
\@tfor\z:=#1\do{\grid{\z}}}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
85 |
\makeatother |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
86 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
87 |
\newcommand\Vspace[1][.3em]{%
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
88 |
\mbox{\kern.06em\vrule height.3ex}%
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
89 |
\vbox{\hrule width#1}%
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
90 |
\hbox{\vrule height.3ex}}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
91 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
92 |
\def\VS{\Vspace[0.6em]}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
93 |
|
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
94 |
\begin{document}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
95 |
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
\section*{Handout 5}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
98 |
Whenever you want to design a new programming language or implement a compiler for an |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
99 |
existing language, the first task is to fix the basic ``words'' of the language. For example what are the |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
100 |
keywords, or reserved words, of the language, what are permitted identifiers, numbers and so |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
101 |
on. One convenient way to do this is, of |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
102 |
course, by using regular expressions. |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
103 |
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
104 |
In this course we want to take a closer look at the |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
105 |
WHILE-language. This is a simple imperative programming language consisting of arithmetic |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
106 |
expressions, assignments, if-statements and loops. For example the Fibonacci program can be |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
107 |
written in this language as follows: |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
108 |
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
109 |
\begin{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
110 |
\mbox{\lstinputlisting[language=while]{../progs/fib.while}}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
111 |
\end{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
112 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
113 |
\noindent |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
114 |
The keywords in this language will be |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
115 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
116 |
\begin{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
117 |
\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
118 |
\end{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
119 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
120 |
\noindent |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
121 |
In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
122 |
and strings (which we however ignore for the moment). We also need to specify what the ``white space'' |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
123 |
is in our programming language and what comments should look like. |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
124 |
As a first try, we specify the regular expressions for our language roughly as follows |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
125 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
126 |
\begin{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
127 |
\begin{tabular}{lcl}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
128 |
$\textit{KEYWORD}$ & $:=$ & $\texttt{while} + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
129 |
$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
130 |
$\textit{OP}$ & $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
131 |
$\textit{NUM}$ & $:=$ & $\textit{DIGIT}^+$\\
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
132 |
$\textit{WHITESPACE}$ & $:=$ & $"\hspace{2mm}" + \backslash\texttt{n}$
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
133 |
\end{tabular}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
134 |
\end{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
135 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
136 |
\noindent |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
137 |
with the usual meanings for the regular expressions $\textit{LETTER}$ and $\textit{DIGIT}$.
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
138 |
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
139 |
Having the regular expressions in place, the problem we have to solve is: |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
140 |
given a string of our programming language, which regular expression |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
141 |
matches which part of the string. In this way we can split up a string into components. |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
142 |
For example we expect that the input string |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
143 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
144 |
\begin{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
145 |
\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
146 |
\end{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
147 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
148 |
\noindent |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
149 |
is split up as follows |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
150 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
151 |
\begin{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
152 |
\tt |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
153 |
\Grid{if}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
154 |
\Grid{\VS}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
155 |
\Grid{true}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
156 |
\Grid{\VS}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
157 |
\Grid{then}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
158 |
\Grid{\VS}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
159 |
\Grid{x}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
160 |
\Grid{+}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
161 |
\Grid{2}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
162 |
\Grid{\VS}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
163 |
\Grid{else}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
164 |
\Grid{\VS}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
165 |
\Grid{x}\,
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
166 |
\Grid{+}\,
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
167 |
\Grid{3}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
168 |
\end{center}
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
169 |
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
170 |
\noindent |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
171 |
This process of splitting an input string into components is often called \emph{lexing} or \emph{scanning}.
|
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
172 |
It is usually the first phase of a compiler. Note that the separation into words cannot, in general, |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
173 |
be done by looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace,
|
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
174 |
the components in \texttt{x+2} are not. Another reason for recognising whitespaces explicitly is
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
175 |
that in some languages, for example Python, whitespace matters. However in our small language we will eventually just filter out all whitespaces and also comments. |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
176 |
|
|
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
177 |
Lexing will not just separate the input into its components, but also classify the components, that |
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
178 |
is explicitly record that \texttt{it} is a keyword, \VS{} a whitespace, \texttt{true} an identifier and so on.
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
179 |
For the moment, though, we will only focus on the simpler problem of splitting a string into components. |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
180 |
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
181 |
There are a few subtleties we need to consider first. For example, say the input string is |
|
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
182 |
|
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
183 |
\begin{center}
|
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
184 |
\texttt{\Grid{iffoo\VS\ldots}}
|
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
185 |
\end{center}
|
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
186 |
|
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
187 |
\noindent |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
188 |
then there are two possibilities how it can be split up: either we regard the input as the keyword \texttt{if} followed
|
|
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
189 |
by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
190 |
single identifier. The choice that is often made in lexers is to look for the longest possible match. |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
191 |
This leaves \texttt{iffoo} as the only match (since it is longer than \texttt{if}).
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
192 |
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
193 |
Unfortuantely, the convention of the longs match does not yet make the whole process |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
194 |
completely deterministic. Consider the input string |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
195 |
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
196 |
\begin{center}
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
197 |
\texttt{\Grid{then\VS\dots}}
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
198 |
\end{center}
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
199 |
|
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
200 |
\noindent |
|
156
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
201 |
Clearly, this string should clearly be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers would also match this string. To overcome this ambiguity we need to rank our
|
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
202 |
regular expressions. In our running example we just use the ranking |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
203 |
|
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
204 |
\[ |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
205 |
\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots
|
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
206 |
\] |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
207 |
|
|
156
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
208 |
\noindent |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
209 |
and so on. So even if both regular expressions match in the example above, |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
210 |
we can give the regular expression for \ref{Page ??} as follows
|
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
211 |
|
|
156
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
212 |
Let us see how our algorithm for lexing works in detail. The regular |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
213 |
expressions and their ranking are shown above. |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
214 |
\end{document}
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
215 |
|
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
216 |
%%% Local Variables: |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
217 |
%%% mode: latex |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
218 |
%%% TeX-master: t |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
219 |
%%% End: |