afl-material: hws/hw06.tex@eda0ccf56c72 (annotated)

56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	1	\documentclass{article}
292 7ed2a25dd115 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 102 diff changeset	2	\usepackage{../style}
7ed2a25dd115 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 102 diff changeset	3	\usepackage{../graphics}
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	4	\usepackage{../grammar}
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	5
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	6	\begin{document}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	7
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	8	\section*{Homework 6}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	9
916 10f834eb0a9e texupdate Christian Urban <christian.urban@kcl.ac.uk> parents: 898 diff changeset	10	%\HEADER
359 db106e5b7c4d updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 292 diff changeset	11
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	12	\begin{enumerate}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	13	\item (i) Give the regular expressions for lexing a language
401 5d85dc9779b1 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 359 diff changeset	14	consisting of whitespaces, identifiers (some letters
5d85dc9779b1 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 359 diff changeset	15	followed by digits), numbers, operations \texttt{=},
5d85dc9779b1 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 359 diff changeset	16	\texttt{<} and \texttt{>}, and the keywords \texttt{if},
5d85dc9779b1 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 359 diff changeset	17	\texttt{then} and \texttt{else}. (ii) Decide whether the
5d85dc9779b1 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 359 diff changeset	18	following strings can be lexed in this language?
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	19
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	20	\begin{enumerate}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	21	\item \texttt{"if y4 = 3 then 1 else 3"}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	22	\item \texttt{"if33 ifif then then23 else else 32"}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	23	\item \texttt{"if x4x < 33 then 1 else 3"}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	24	\end{enumerate}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	25
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	26	In case they can, give the corresponding token sequences. (Hint:
89 24adcc265f2e typo Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 56 diff changeset	27	Observe the maximal munch rule and priorities of your regular
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	28	expressions that make the process of lexing unambiguous.)
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	29
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	30	\item Suppose the grammar
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	31
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	32	\begin{plstx}[margin=1cm]
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	33	:\meta{E} ::= \meta{F} \;\|\; \meta{F} \cdot \texttt{*} \cdot \meta{F} \;\|\; \meta{F} \cdot \backslash \cdot \meta{F}\\
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	34	:\meta{F} ::= \meta{T} \;\|\; \meta{T} \cdot \texttt{+} \cdot \meta{T} \;\|\; \meta{T} \cdot \texttt{-} \cdot \meta{T}\\
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	35	:\meta{T} ::= $num$ \;\|\; \texttt{(} \cdot E \cdot \texttt{)}\\
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	36	\end{plstx}
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	37
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	38	where \meta{E}, \meta{F} and \meta{T} are non-terminals, \meta{E} is
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	39	the starting symbol of the grammar, and $num$ stands for a number
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	40	token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. Note that
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	41	\meta{F} and \meta{T} are ``exchanged'' in this grammar in comparison with
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	42	the usual grammar for arithmetic expressions. What does this mean in terms
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	43	of precedences of the operators?
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	44
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	45	\solution{
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	46	For the second part "1+23" will be parsed as (1+2)3, meaning + and - bind
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	47	tighter than * and $\backslash$
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	48	}
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	49
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	50	\item Define what it means for a grammar to be ambiguous. Give an example of
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	51	an ambiguous grammar.
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	52
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	53	\solution{
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	54	Already the grammar
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	55	\begin{plstx}[margin=1cm]
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	56	: \meta{E} ::= \meta{E}\cdot +\cdot\meta{E} \| num\\
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	57	\end{plstx}
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	58
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	59	is ambiguous because a string like "1+2+3" can be parsed
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	60	as (1+2)+3 or 1+(2+3).
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	61	}
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	62
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	63
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	64	\item Suppose boolean expressions are built up from
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	65
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	66	\begin{center}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	67	\begin{tabular}{ll}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	68	1.) & tokens for \texttt{true} and \texttt{false},\\
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	69	2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	70	3.) & the prefix operation $\neg$, and\\
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	71	4.) & can be enclosed in parentheses.
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	72	\end{tabular}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	73	\end{center}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	74
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	75	(i) Give a grammar that can recognise such boolean expressions
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	76	and (ii) give a sample string involving all rules given in 1.-4.~that
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	77	can be parsed by this grammar.
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	78
459 780486571e38 updated Christian Urban <urbanc@in.tum.de> parents: 401 diff changeset	79
721 e3c64f22dd31 added slides from Rochester Christian Urban <christian.urban@kcl.ac.uk> parents: 647 diff changeset	80
647 180600c04da2 updated Christian Urban <urbanc@in.tum.de> parents: 596 diff changeset	81	\item Parsing combinators consist of atomic parsers, alternative
180600c04da2 updated Christian Urban <urbanc@in.tum.de> parents: 596 diff changeset	82	parsers, sequence parsers and semantic actions. What is the purpose
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	83	of (1) atomic parsers and of (2) map-parsers (also called semantic actions)?
647 180600c04da2 updated Christian Urban <urbanc@in.tum.de> parents: 596 diff changeset	84
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	85	\solution{
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	86	Atomic parsers look at a concrete prefix of the input, like num-tokens or identifiers.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	87	Map-parsers apply a function to the output of a parser. In this way you can transform
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	88	the output from, for example, a string to an integer.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	89	}
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	90
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	91
726 fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	92	\item Parser combinators can directly be given a string as
fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	93	input, without the need of a lexer. What are the
fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	94	advantages to first lex a string and then feed a
fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	95	sequence of tokens as input to the parser?
fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	96
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	97	\solution{ Reason 1 you can filter out whitespaces and comments,
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	98	which makes the grammar rules simpler. If you have to make
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	99	sure that a whitespace comes after a variable say, then your
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	100	parser rule for variables gets more complicated. Same with
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	101	comments which do not contribute anything to the parse tree.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	102
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	103	Reason 2) The lexer can already classify tokens, for example
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	104	as numbers, keywords or identifiers. This again makes the grammar
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	105	rules more deterministic and as a result faster to parse.
898 45a48c47dcca updated Christian Urban <christian.urban@kcl.ac.uk> parents: 894 diff changeset	106
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	107	The point is that parser combinators can be used to process
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	108	strings, but in case of compilers where whitespaces and
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	109	comments need to be filtered out, the lexing phase is actually
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	110	quite useful. }
898 45a48c47dcca updated Christian Urban <christian.urban@kcl.ac.uk> parents: 894 diff changeset	111
894 02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	112	\item The injection function for sequence regular expressions is defined
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	113	by three clauses:
726 fba480bbc9f7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 721 diff changeset	114
894 02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	115	\begin{center}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	116	\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	117	$\inj\,(r_1 \cdot r_2)\,c\,\,Seq(v_1,v_2)$ & $\dn$ & $Seq(\inj\,r_1\,c\,v_1,v_2)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	118	$\inj\,(r_1 \cdot r_2)\,c\,\,\Left(Seq(v_1,v_2))$ & $\dn$ & $Seq(\inj\,r_1\,c\,v_1,v_2)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	119	$\inj\,(r_1 \cdot r_2)\,c\,\,Right(v)$ & $\dn$ & $Seq(\textit{mkeps}(r_1),\inj\,r_2\,c\,v)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	120	\end{tabular}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	121	\end{center}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	122
954 eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	123	Explain why there are three cases in the injection function for sequence
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	124	regular expressions.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	125
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	126	\solution{
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	127	This is because the derivative of sequences can be of the form
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	128
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	129	\begin{itemize}
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	130	\item $(der\,c\,r_1)\cdot r_2$
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	131	\item $(der\,c\,r_1)\cdot r_2 \;+\; der\,c\,r_2$
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	132	\end{itemize}
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	133
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	134	In the first case the value needs to be of the form $Seq$, in the second case $\Left$ or $Right$.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	135	Therefore 3 cases.
eda0ccf56c72 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 916 diff changeset	136	}
894 02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc Christian Urban <christian.urban@kcl.ac.uk> parents: 726 diff changeset	137
459 780486571e38 updated Christian Urban <urbanc@in.tum.de> parents: 401 diff changeset	138	\item \POSTSCRIPT
56 f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	139	\end{enumerate}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	140
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	141	\end{document}
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	142
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	143	%%% Local Variables:
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	144	%%% mode: latex
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	145	%%% TeX-master: t
f28824933a66 added Christian Urban <urbanc@in.tum.de> parents: diff changeset	146	%%% End:

author	Christian Urban <christian.urban@kcl.ac.uk>
	Sat, 11 Nov 2023 10:08:33 +0000
changeset 954	eda0ccf56c72
parent 916	10f834eb0a9e
child 955	47acfd7f9096
permissions	-rw-r--r--