author | Christian Urban <christian dot urban at kcl dot ac dot uk> |
Fri, 01 Nov 2013 11:57:04 +0000 | |
changeset 173 | 7cfb7a6f7c99 |
parent 163 | 89d6d89d9844 |
child 217 | cd6066f1056a |
permissions | -rw-r--r-- |
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
\documentclass{article} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
\usepackage{charter} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
\usepackage{hyperref} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
\usepackage{amssymb} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
\usepackage{amsmath} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
\usepackage[T1]{fontenc} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
\usepackage{listings} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
\usepackage{xcolor} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
\usepackage{tikz} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
\usetikzlibrary{arrows} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
\usetikzlibrary{automata} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
\usetikzlibrary{shapes} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
\usetikzlibrary{shadows} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
\usetikzlibrary{positioning} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
\usetikzlibrary{calc} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
\usetikzlibrary{fit} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
\usetikzlibrary{backgrounds} |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
18 |
\usepackage{fontspec} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
19 |
\setmonofont{Consolas} |
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
\definecolor{javared}{rgb}{0.6,0,0} % for strings |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc |
173
7cfb7a6f7c99
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
163
diff
changeset
|
27 |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
\lstdefinelanguage{scala}{ |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
morekeywords={abstract,case,catch,class,def,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
do,else,extends,false,final,finally,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
for,if,implicit,import,match,mixin,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
new,null,object,override,package,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
private,protected,requires,return,sealed,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
super,this,throw,trait,true,try,% |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
type,val,var,while,with,yield}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
otherkeywords={=>,<-,<\%,<:,>:,\#,@}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
sensitive=true, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
morecomment=[l]{//}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
morecomment=[n]{/*}{*/}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
morestring=[b]", |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
morestring=[b]', |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
morestring=[b]""" |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
45 |
\lstdefinelanguage{while}{ |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
46 |
morekeywords={while, if, then. else, read, write}, |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
47 |
otherkeywords={=>,<-,<\%,<:,>:,\#,@}, |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
48 |
sensitive=true, |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
49 |
morecomment=[l]{//}, |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
50 |
morecomment=[n]{/*}{*/}, |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
51 |
morestring=[b]", |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
52 |
morestring=[b]', |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
53 |
morestring=[b]""" |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
54 |
} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
55 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
56 |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
\lstset{language=Scala, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
basicstyle=\ttfamily, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
keywordstyle=\color{javapurple}\bfseries, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
stringstyle=\color{javagreen}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
commentstyle=\color{javagreen}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
morecomment=[s][\color{javadocblue}]{/**}{*/}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
numbers=left, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
numberstyle=\tiny\color{black}, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
stepnumber=1, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
numbersep=10pt, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
tabsize=2, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
showspaces=false, |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
showstringspaces=false} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
71 |
\newcommand\grid[1]{% |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
72 |
\begin{tikzpicture}[baseline=(char.base)] |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
73 |
\path[use as bounding box] |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
74 |
(0,0) rectangle (1em,1em); |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
75 |
\draw[red!50, fill=red!20] |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
76 |
(0,0) rectangle (1em,1em); |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
77 |
\node[inner sep=1pt,anchor=base west] |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
78 |
(char) at (0em,\gridraiseamount) {#1}; |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
79 |
\end{tikzpicture}} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
80 |
\newcommand\gridraiseamount{0.12em} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
81 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
82 |
\makeatletter |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
83 |
\newcommand\Grid[1]{% |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
84 |
\@tfor\z:=#1\do{\grid{\z}}} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
85 |
\makeatother |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
86 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
87 |
\newcommand\Vspace[1][.3em]{% |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
88 |
\mbox{\kern.06em\vrule height.3ex}% |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
89 |
\vbox{\hrule width#1}% |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
90 |
\hbox{\vrule height.3ex}} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
91 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
92 |
\def\VS{\Vspace[0.6em]} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
93 |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
94 |
\begin{document} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
95 |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
\section*{Handout 5} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
98 |
Whenever you want to design a new programming language or implement a compiler for an |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
99 |
existing language, the first task is to fix the basic ``words'' of the language. For example what are the |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
100 |
keywords, or reserved words, of the language, what are permitted identifiers, numbers, expressions and so |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
101 |
on. One convenient way to do this is, of |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
102 |
course, by using regular expressions. |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
103 |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
104 |
In this course we want to take a closer look at the |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
105 |
WHILE programming language. This is a simple imperative programming language consisting of arithmetic |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
106 |
expressions, assignments, if-statements and loops. For example the Fibonacci program can be |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
107 |
written in this language as follows: |
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
108 |
|
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
109 |
\begin{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
110 |
\mbox{\lstinputlisting[language=while]{../progs/fib.while}} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
111 |
\end{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
112 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
113 |
\noindent |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
114 |
The keywords in this language will be |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
115 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
116 |
\begin{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
117 |
\texttt{while}, \texttt{if}, \texttt{then}, \texttt{else}, \texttt{write}, \texttt{read} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
118 |
\end{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
119 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
120 |
\noindent |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
121 |
In addition we will have some common operators, such as \texttt{<}, \texttt{>}, \texttt{:=} and so on, as well as numbers |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
122 |
and strings (which we however ignore for the moment). We also need to specify what the ``whitespace'' |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
123 |
is in our programming language and what comments should look like. |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
124 |
As a first try, we might specify the regular expressions for our language roughly as follows |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
125 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
126 |
\begin{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
127 |
\begin{tabular}{lcl} |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
128 |
$\textit{LETTER}$ & $:=$ & $\texttt{a} + \texttt{A} + \texttt{b} + \texttt{B} + \ldots$\\ |
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
129 |
$\textit{DIGIT}$ & $:=$ & $\texttt{0} + \texttt{1} + \texttt{2} + \ldots$\\ |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
130 |
$\textit{KEYWORD}$ & $:=$ & $\texttt{while} + \texttt{if} + \texttt{then} + \texttt{else} + \dots$\\ |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
131 |
$\textit{IDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^*$\\ |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
132 |
$\textit{OP}$ & $:=$ & $\texttt{:=} + \texttt{<} + \ldots$\\ |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
133 |
$\textit{NUM}$ & $:=$ & $\textit{DIGIT}^+$\\ |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
134 |
$\textit{WHITESPACE}$ & $:=$ & $("\hspace{2mm}" + \backslash\texttt{n})^+$ |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
135 |
\end{tabular} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
136 |
\end{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
137 |
|
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
138 |
Having the regular expressions in place, the problem we have to solve is: |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
139 |
given a string of our programming language, which regular expression |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
140 |
matches which part of the string. By solving this problem, we can split up a string |
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
141 |
of our language into components. |
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
142 |
For example given the input string |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
143 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
144 |
\begin{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
145 |
\texttt{\Grid{if\VS true\VS then\VS x+2\VS else\VS x+3}} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
146 |
\end{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
147 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
148 |
\noindent |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
149 |
we expect it is split up as follows |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
150 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
151 |
\begin{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
152 |
\tt |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
153 |
\Grid{if}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
154 |
\Grid{\VS}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
155 |
\Grid{true}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
156 |
\Grid{\VS}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
157 |
\Grid{then}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
158 |
\Grid{\VS}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
159 |
\Grid{x}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
160 |
\Grid{+}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
161 |
\Grid{2}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
162 |
\Grid{\VS}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
163 |
\Grid{else}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
164 |
\Grid{\VS}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
165 |
\Grid{x}\, |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
166 |
\Grid{+}\, |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
167 |
\Grid{3} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
168 |
\end{center} |
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
169 |
|
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
170 |
\noindent |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
171 |
This process of splitting up an input string into components is often called \emph{lexing} or \emph{scanning}. |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
172 |
It is usually the first phase of a compiler. Note that the separation into words cannot, in general, |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
173 |
be done by just looking at whitespaces: while \texttt{if} and \texttt{true} are separated by a whitespace |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
174 |
in the example above, this is not always the case. As can be seen the three components in \texttt{x+2} are |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
175 |
not separated by any whitespace. Another reason for recognising whitespaces explicitly is |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
176 |
that in some languages, for example Python, whitespaces matters, that is carry meaning. However in |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
177 |
our small language we will eventually just filter out all whitespaces and also all comments. |
153
70ab41cb610e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
152
diff
changeset
|
178 |
|
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
179 |
Lexing not just separates a string into its components, but also classifies the components, meaning it explicitly records that \texttt{if} is a keyword, \VS{} a whitespace, \texttt{true} an identifier and so on. |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
180 |
For the moment, though, we will only focus on the simpler problem of just splitting up |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
181 |
a string into components. |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
182 |
|
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
183 |
There are a few subtleties we need to consider first. For example, say the string is |
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
184 |
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
185 |
\begin{center} |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
186 |
\texttt{\Grid{iffoo\VS}}\;\ldots |
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
187 |
\end{center} |
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
188 |
|
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
189 |
\noindent |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
190 |
then there are two possibilities for how it can be split up: either we regard the input as the keyword \texttt{if} followed |
154
51d6b8b828c4
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
153
diff
changeset
|
191 |
by the identifier \texttt{foo} (both regular expressions match) or we regard \texttt{iffoo} as a |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
192 |
single identifier. The choice that is often made in lexers is to look for the longest possible match. |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
193 |
This leaves \texttt{iffoo} as the only match in this case (since it is longer than \texttt{if}). |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
194 |
|
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
195 |
Unfortunately, the convention about the longest match does not yet make the process |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
196 |
of lexing completely deterministic. Consider the string |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
197 |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
198 |
\begin{center} |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
199 |
\texttt{\Grid{then\VS}}\;\dots |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
200 |
\end{center} |
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
201 |
|
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
202 |
\noindent |
159
ae5ceef5355e
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
158
diff
changeset
|
203 |
Clearly, this string should be identified as a keyword. The problem is that also the regular expression \textit{IDENT} for identifiers matches this string. To overcome this ambiguity we need to rank our |
156
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
204 |
regular expressions. In our running example we just use the ranking |
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
205 |
|
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
206 |
\[ |
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
207 |
\textit{KEYWORD} < \textit{IDENT} < \textit{OP} < \ldots |
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
208 |
\] |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
209 |
|
156
95eaee695636
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
155
diff
changeset
|
210 |
\noindent |
160
8134c3b981e0
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
159
diff
changeset
|
211 |
So even if both regular expressions match in the example above, |
8134c3b981e0
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
159
diff
changeset
|
212 |
we give preference to the regular expression for keywords. |
155
9b2d128765e1
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
154
diff
changeset
|
213 |
|
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
214 |
Let us see how our algorithm for lexing works in detail. In addition to the functions $nullable$ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
215 |
and $der$, it will use the function \emph{zeroable} defined as follows: |
157
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
216 |
|
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
217 |
\begin{center} |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
218 |
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
219 |
$zeroable(\varnothing)$ & $\dn$ & $true$\\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
220 |
$zeroable(\epsilon)$ & $\dn$ & $f\!alse$\\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
221 |
$zeroable (c)$ & $\dn$ & $f\!alse$\\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
222 |
$zeroable (r_1 + r_2)$ & $\dn$ & $zeroable(r_1) \wedge zeroable(r_2)$ \\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
223 |
$zeroable (r_1 \cdot r_2)$ & $\dn$ & $zeroable(r_1) \vee zeroable(r_2)$ \\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
224 |
$zeroable (r^*)$ & $\dn$ & $f\!alse$\\ |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
225 |
\end{tabular} |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
226 |
\end{center} |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
227 |
|
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
228 |
\noindent |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
229 |
Recall that the function $nullable(r)$ tests whether a regular expression |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
230 |
can match the empty string. The function $zeroable$, on the other hand, tests whether a regular |
158
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
231 |
expression cannot match anything at all. The mathematical way of stating this |
160
8134c3b981e0
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
159
diff
changeset
|
232 |
property is |
157
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
233 |
|
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
234 |
\begin{center} |
160
8134c3b981e0
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
159
diff
changeset
|
235 |
$zeroable(r)$ if and only if $L(r) = \varnothing$ |
157
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
236 |
\end{center} |
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
237 |
|
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
238 |
\noindent |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
239 |
For what follows let us fix a set of regular expressions $rs$ as being |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
240 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
241 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
242 |
\textit{KEYWORD}, \textit{IDENT}, \textit{WHITESPACE} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
243 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
244 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
245 |
\noindent |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
246 |
specifying the ``words'' |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
247 |
of our programming language. The algorithm takes as input the $rs$ and a string, say |
158
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
248 |
|
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
249 |
\begin{center} |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
250 |
\texttt{\grid{$c_1$}\grid{$c_2$}\grid{$c_3$}\grid{$c_4$}}\;\ldots |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
251 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
252 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
253 |
\noindent |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
254 |
and tries to chop off one word from the beginning of the string. If none of the |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
255 |
regular expression in $rs$ matches, we will just return |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
256 |
the empty string. |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
257 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
258 |
The crucial idea in the algorithm is to build the derivatives of all regular expressions in $rs$ with respect |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
259 |
to the first character $c_1$. Then we take the results and continue with |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
260 |
building the derivatives with respect to $c_2$ until we have either exhausted our |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
261 |
input string or all of the regular expressions are ``zeroable''. Suppose the input string is |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
262 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
263 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
264 |
\texttt{\Grid{if2\VS}}\;\dots |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
265 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
266 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
267 |
\noindent |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
268 |
then building the derivatives with respect to \texttt{i} gives |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
269 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
270 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
271 |
\begin{tabular}{l|c} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
272 |
& $zeroable$\\\hline |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
273 |
$der\;\texttt{i}\;(\textit{KEYWORD})$ & no\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
274 |
$der\;\texttt{i}\;(\textit{IDENT})$ & no\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
275 |
$der\;\texttt{i}\;(\textit{WHITESPACE})$ & yes\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
276 |
\end{tabular} |
158
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
277 |
\end{center} |
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
278 |
|
77e8397ec2ec
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
157
diff
changeset
|
279 |
\noindent |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
280 |
We can eliminate \textit{WHITESPACE} as a potential candidate, because |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
281 |
no derivative can go from $zeroable = \text{yes}$ to no. That leaves the other |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
282 |
two regular expressions as potential candidate and we have to consider the |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
283 |
next character, \texttt{f}, from the input string |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
284 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
285 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
286 |
\begin{tabular}{l|c} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
287 |
& $zeroable$\\\hline |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
288 |
$der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ & no\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
289 |
$der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))$ & no\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
290 |
\end{tabular} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
291 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
292 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
293 |
\noindent |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
294 |
Since both are `no', we have to continue with \texttt{2} from the input string |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
295 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
296 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
297 |
\begin{tabular}{l|c} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
298 |
& $zeroable$\\\hline |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
299 |
$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD})))$ & yes\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
300 |
$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ & no\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
301 |
\end{tabular} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
302 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
303 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
304 |
\noindent |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
305 |
Although we now know that the beginning is definitely an \textit{IDENT}, we do not yet |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
306 |
know how much of the input string should be considered as an \textit{IDENT}. So we |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
307 |
still have to continue and consider the next derivative. |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
308 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
309 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
310 |
\begin{tabular}{l|c} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
311 |
& $zeroable$\\\hline |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
312 |
$der\;\texttt{\VS}\;(der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT}))))$ & yes\\ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
313 |
\end{tabular} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
314 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
315 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
316 |
\noindent |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
317 |
Since the answer is now `yes' also in this case, we can stop: once all derivatives are |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
318 |
zeroable, we know the regular expressions cannot match any more letters from |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
319 |
the input. In this case we only have to go back to the derivative that is nullable. In this case it |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
320 |
is |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
321 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
322 |
\begin{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
323 |
$der\;\texttt{2}\;(der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{IDENT})))$ |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
324 |
\end{center} |
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
325 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
326 |
\noindent |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
327 |
which means we recognised an identifier. In case where there is a choice of more than one |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
328 |
regular expressions that are nullable, then we choose the one with the highest precedence. |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
329 |
You can try out such a case with the input string |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
330 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
331 |
\begin{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
332 |
\texttt{\Grid{then\VS}}\;\dots |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
333 |
\end{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
334 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
335 |
\noindent |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
336 |
which can both be recognised as a keyword, but also an identifier. |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
337 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
338 |
While in the example above the last nullable derivative is the one directly before |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
339 |
the derivative turns zeroable, this is not always the case. Imagine, identifiers can be |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
340 |
letters, as permuted by the regular expression \textit{IDENT}, but must end with an |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
341 |
undercore. |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
342 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
343 |
\begin{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
344 |
\begin{tabular}{lcl} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
345 |
$\textit{NEWIDENT}$ & $:=$ & $\textit{LETTER} \cdot (\textit{LETTER} + \textit{DIGIT} + {\_})^* \cdot \_$\\ |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
346 |
\end{tabular} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
347 |
\end{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
348 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
349 |
\noindent |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
350 |
If we use \textit{NEWIDENT} with the input string |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
351 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
352 |
\begin{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
353 |
\texttt{\Grid{iffoo\VS}}\;\ldots |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
354 |
\end{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
355 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
356 |
\noindent |
163
89d6d89d9844
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
162
diff
changeset
|
357 |
then it will only become $zeroable$ after the $\VS$ has been analysed. In this case we have to go back to the |
162
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
358 |
first \texttt{f} because only |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
359 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
360 |
\begin{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
361 |
$der\;\texttt{f}\;(der\;\texttt{i}\;(\textit{KEYWORD}))$ |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
362 |
\end{center} |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
363 |
|
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
364 |
\noindent |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
365 |
is nullable. As a result we recognise successfully the keyword \texttt{if} and the remaining |
edcd84c7b491
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
161
diff
changeset
|
366 |
string needs to be consumed by other regular expressions or lead to a lexing error. |
161
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
367 |
|
bfcf838dda4d
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
160
diff
changeset
|
368 |
|
157
b6eee9571a63
added
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
156
diff
changeset
|
369 |
|
151
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
370 |
\end{document} |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
371 |
|
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
372 |
%%% Local Variables: |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
373 |
%%% mode: latex |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
374 |
%%% TeX-master: t |
df229ec49b22
added ho
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
375 |
%%% End: |