author | Christian Urban <christian.urban@kcl.ac.uk> |
Tue, 28 Nov 2023 11:42:31 +0000 | |
changeset 956 | ae9782e62bdd |
parent 953 | 5e070fb0332a |
child 963 | 85bb0ef99fc7 |
permissions | -rw-r--r-- |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
\documentclass{article} |
292
7ed2a25dd115
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
267
diff
changeset
|
2 |
\usepackage{../style} |
7ed2a25dd115
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
267
diff
changeset
|
3 |
\usepackage{../graphics} |
527 | 4 |
\usepackage{../grammar} |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
|
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
\begin{document} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
|
267
a1544b804d1e
updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
147
diff
changeset
|
8 |
% explain what is a context-free grammar and the language it generates |
a1544b804d1e
updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
147
diff
changeset
|
9 |
% |
a1544b804d1e
updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
147
diff
changeset
|
10 |
|
a1544b804d1e
updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
147
diff
changeset
|
11 |
|
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
\section*{Homework 5} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
|
359
db106e5b7c4d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
322
diff
changeset
|
14 |
\HEADER |
db106e5b7c4d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
322
diff
changeset
|
15 |
|
db106e5b7c4d
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
322
diff
changeset
|
16 |
|
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
\begin{enumerate} |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
18 |
\item Consider the basic regular expressions |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
19 |
|
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
20 |
\begin{center} |
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
21 |
$r ::= \ZERO \;|\; \ONE \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
22 |
\end{center} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
23 |
|
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
24 |
and suppose you want to show a property $P(r)$ for all |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
25 |
regular expressions $r$ by structural induction. Write |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
26 |
down which cases do you need to analyse. State clearly |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
27 |
the induction hypotheses if applicable in a case. |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
28 |
|
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
29 |
\item Define a regular expression, written $ALL$, that can |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
30 |
match every string. This definition should be in terms |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
31 |
of the following extended regular expressions: |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
32 |
|
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
33 |
\begin{center} |
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
34 |
$r ::= \ZERO \;|\; |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
35 |
\ONE \;|\; |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
36 |
c \;|\; |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
37 |
r_1 + r_2 \;|\; |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
38 |
r_1 \cdot r_2 \;|\; |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
39 |
r^* \;|\; |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
40 |
\sim r$ |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
41 |
\end{center} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
42 |
|
953 | 43 |
\solution{ |
44 |
There is the obvious solution $\sim{}\ZERO$, but also $a + \sim{}a$ would work. |
|
45 |
} |
|
46 |
||
47 |
||
322
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
48 |
%\item Assume the delimiters for comments are \texttt{$\slash$*} |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
49 |
%and \texttt{*$\slash$}. Give a regular expression that can |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
50 |
%recognise comments of the form |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
51 |
% |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
52 |
%\begin{center} |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
53 |
%\texttt{$\slash$*~\ldots{}~*$\slash$} |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
54 |
%\end{center} |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
55 |
% |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
56 |
%where the three dots stand for arbitrary characters, but not |
698ed1c96cd0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
294
diff
changeset
|
57 |
%comment delimiters. |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
58 |
|
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
\item Define the following regular expressions |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
|
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
\begin{center} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
\begin{tabular}{ll} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
$r^+$ & (one or more matches)\\ |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
$r^?$ & (zero or one match)\\ |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
$r^{\{n\}}$ & (exactly $n$ matches)\\ |
953 | 66 |
$r^{\{m.. n\}}$ & (at least $m$ and maximal $n$ matches, with the\\ |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
& \phantom{(}assumption $m \le n$)\\ |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
\end{tabular} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
\end{center} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
|
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
71 |
in terms of the usual basic regular expressions |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
72 |
|
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
73 |
\begin{center} |
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
74 |
$r ::= \ZERO \;|\; \ONE \;|\; c \;|\; r_1 + r_2 \;|\; r_1 \cdot r_2 \;|\; r^*$ |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
75 |
\end{center} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
76 |
|
953 | 77 |
\solution{ |
78 |
$r^+ \dn r\cdot r^*$\\ |
|
79 |
$r^? \dn r + 1$\\ |
|
80 |
$r^{\{0\}} = \ONE$\\ |
|
81 |
$r^{\{n\}} \dn r\cdot r^{\{n-1\}}$\\ |
|
82 |
$r^{\{..n\}} \dn (r^?)^{\{n\}}$\\ |
|
83 |
$r^{\{n..m\}} \dn r^{\{..m-n\}}\cdot r^{\{n\}}$\\ |
|
84 |
||
85 |
BTW, $r^{\{n..m\}}$ cannot be defined in terms of $r^{\{n..\}} \;\&\; r^{\{..m\}}$ where $\&$ is |
|
86 |
the intersection operator I introduced this year. For example assume $r=aaa + aaaaaaa$, then |
|
87 |
$r^{\{4..6\}}$ cannot match 21 a's, but $r^{\{4..\}} \;\&\; r^{\{..6\}}$. |
|
88 |
} |
|
89 |
||
90 |
||
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
91 |
\item Give the regular expressions for lexing a language |
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
92 |
consisting of identifiers, left-parenthesis \texttt{(}, |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
93 |
right-parenthesis \texttt{)}, numbers that can be either |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
94 |
positive or negative, and the operations \texttt{+}, |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
95 |
\texttt{-} and \texttt{*}. |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
|
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
97 |
Decide whether the following strings can be lexed in |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
98 |
this language? |
147
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
99 |
|
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
100 |
\begin{enumerate} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
101 |
\item \texttt{"(a3+3)*b"} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
102 |
\item \texttt{")()++-33"} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
103 |
\item \texttt{"(b42/3)*3"} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
104 |
\end{enumerate} |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
105 |
|
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
106 |
In case they can, give the corresponding token sequences. (Hint: |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
107 |
Observe the maximal munch rule and the priorities of your regular |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
108 |
expressions that make the process of lexing unambiguous.) |
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
109 |
|
953 | 110 |
\solution{ |
111 |
The first two strings can be lexed. But not the last ($/$ is not part of the language). |
|
112 |
} |
|
113 |
||
602 | 114 |
\item Suppose the following context-free grammar $G$ |
527 | 115 |
|
116 |
\begin{plstx}[margin=1cm] |
|
117 |
: \meta{S\/} ::= \meta{A\/}\cdot\meta{S\/}\cdot\meta{B\/} \;\mid\; |
|
118 |
\meta{B\/}\cdot\meta{S\/}\cdot\meta{A\/} \;\mid\; \epsilon\\ |
|
119 |
: \meta{A\/} ::= a \mid \epsilon\\ |
|
120 |
: \meta{B\/} ::= b\\ |
|
121 |
\end{plstx} |
|
122 |
||
602 | 123 |
where the starting symbol is $\meta{S}$. |
124 |
Which of the following strings are in the language of $G$? |
|
527 | 125 |
|
126 |
\begin{itemize} |
|
127 |
\item[$\bullet$] $a$ |
|
128 |
\item[$\bullet$] $b$ |
|
129 |
\item[$\bullet$] $ab$ |
|
130 |
\item[$\bullet$] $ba$ |
|
131 |
\item[$\bullet$] $bb$ |
|
132 |
\item[$\bullet$] $baa$ |
|
619 | 133 |
\end{itemize} |
134 |
||
953 | 135 |
\solution{ |
136 |
The first and the last cannot be matched. Maybe it is a good exercise to |
|
137 |
write down the derivations for the rest. |
|
138 |
||
139 |
BTW, the language recognised by this grammar is strings consisting of |
|
140 |
a's and b's where there are equal or more number of b's than a's (including the |
|
141 |
empty string). |
|
142 |
} |
|
143 |
||
619 | 144 |
\item Suppose the following context-free grammar |
145 |
||
146 |
\begin{plstx}[margin=1cm] |
|
147 |
: \meta{S\/} ::= a\cdot \meta{S\/}\cdot a\;\mid\; |
|
148 |
b\cdot \meta{S\/}\cdot b\;\mid\; \epsilon\\ |
|
149 |
\end{plstx} |
|
150 |
||
151 |
Describe which language is generated by this grammar. |
|
953 | 152 |
|
153 |
\solution{Palindromes with the same number of a's and b's, including |
|
154 |
the empty string} |
|
155 |
||
156 |
||
937 | 157 |
\item Remember we have specified identifiers with regular expressions as |
158 |
strings that start with a letter followed by letters, digits and |
|
159 |
underscores. This can also be specified by a grammar rule or rules. |
|
160 |
What would the rule(s) look like for identifiers? |
|
527 | 161 |
|
937 | 162 |
\solution{ |
163 |
\begin{plstx}[margin=1cm] |
|
164 |
: \meta{Id\/} ::= \meta{Let\/}\cdot \meta{R}\\ |
|
165 |
: \meta{Let\/} ::= a \;\mid\; \dots \;\mid\; z\\ |
|
166 |
: \meta{Dig\/} ::= 0 \;\mid\; \dots \;\mid\; 9\\ |
|
167 |
: \meta{R\/} ::= \meta{Let\/} \cdot \meta{R\/} \;\mid\; |
|
168 |
\meta{Dig\/} \cdot \meta{R\/} \;\mid\; |
|
169 |
$\_$ \cdot \meta{R\/} \;\mid\; \epsilon\\ |
|
170 |
\end{plstx} |
|
171 |
} |
|
172 |
||
173 |
\item If we specify keywords, identifiers (see above) and programs |
|
174 |
by grammar rules, are there any problems you need to be careful |
|
175 |
about when using a parser for identifying tokens? |
|
176 |
||
177 |
\solution{Parsers do not have the POSIX rules (e.g.~longest munch |
|
178 |
rule) built in. I am not aware that any parser does this out of |
|
179 |
the box and you would need to build in such constraints into the |
|
180 |
grammar rules or parsing mechanism.} |
|
527 | 181 |
|
444
3056a4c071b0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
401
diff
changeset
|
182 |
\item {\bf(Optional)} Recall the definitions for $Der$ and $der$ |
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
183 |
from the lectures. Prove by induction on $r$ the |
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
184 |
property that |
147
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
185 |
|
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
186 |
\[ |
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
187 |
L(der\,c\,r) = Der\,c\,(L(r)) |
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
188 |
\] |
4725bba8ef26
added slides
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
102
diff
changeset
|
189 |
|
401
5d85dc9779b1
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
359
diff
changeset
|
190 |
holds. |
294
c29853b672fb
updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
292
diff
changeset
|
191 |
|
444
3056a4c071b0
updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
401
diff
changeset
|
192 |
\item \POSTSCRIPT |
93
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
193 |
\end{enumerate} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
194 |
|
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
195 |
\end{document} |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
196 |
|
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
197 |
%%% Local Variables: |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
198 |
%%% mode: latex |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
199 |
%%% TeX-master: t |
4794759139ea
better organised
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
200 |
%%% End: |