hws/hw06.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Sat, 11 Nov 2023 10:08:33 +0000
changeset 954 eda0ccf56c72
parent 916 10f834eb0a9e
child 955 47acfd7f9096
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
\documentclass{article}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
     2
\usepackage{../style}
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
     3
\usepackage{../graphics}
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
     4
\usepackage{../grammar}
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
\begin{document}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
\section*{Homework 6}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
916
10f834eb0a9e texupdate
Christian Urban <christian.urban@kcl.ac.uk>
parents: 898
diff changeset
    10
%\HEADER
359
db106e5b7c4d updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 292
diff changeset
    11
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
\begin{enumerate}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
\item (i) Give the regular expressions for lexing a language
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
    14
      consisting of whitespaces, identifiers (some letters
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
    15
      followed by digits), numbers, operations \texttt{=},
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
    16
      \texttt{<} and \texttt{>}, and the keywords \texttt{if},
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
    17
      \texttt{then} and \texttt{else}. (ii) Decide whether the
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
    18
      following strings can be lexed in this language?
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
\begin{enumerate}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    21
\item \texttt{"if y4 = 3 then 1 else 3"}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
\item \texttt{"if33 ifif then then23 else else 32"}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
\item \texttt{"if x4x < 33 then 1 else 3"}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    24
\end{enumerate}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
In case they can, give the corresponding token sequences. (Hint: 
89
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 56
diff changeset
    27
Observe the maximal munch rule and priorities of your regular
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    28
expressions that make the process of lexing unambiguous.)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
\item Suppose the grammar
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    31
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    32
\begin{plstx}[margin=1cm]    
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    33
:\meta{E} ::= \meta{F} \;|\; \meta{F} \cdot \texttt{*} \cdot \meta{F} \;|\; \meta{F} \cdot \backslash \cdot \meta{F}\\
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    34
:\meta{F} ::= \meta{T} \;|\; \meta{T} \cdot \texttt{+} \cdot \meta{T} \;|\; \meta{T} \cdot \texttt{-} \cdot \meta{T}\\
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    35
:\meta{T} ::= $num$ \;|\; \texttt{(} \cdot E \cdot \texttt{)}\\
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    36
\end{plstx}
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    38
where \meta{E}, \meta{F} and \meta{T} are non-terminals, \meta{E} is
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    39
the starting symbol of the grammar, and $num$ stands for a number
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    40
token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. Note that
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    41
\meta{F} and \meta{T} are ``exchanged'' in this grammar in comparison with
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    42
the usual grammar for arithmetic expressions. What does this mean in terms
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    43
of precedences of the operators?
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    44
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    45
\solution{
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    46
  For the second part "1+2*3" will be parsed as (1+2)*3, meaning + and - bind
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    47
  tighter than * and $\backslash$
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    48
}
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    50
\item Define what it means for a grammar to be ambiguous. Give an example of
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
an ambiguous grammar.
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    53
\solution{
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    54
  Already the grammar
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    55
  \begin{plstx}[margin=1cm]
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    56
    : \meta{E} ::= \meta{E}\cdot +\cdot\meta{E} | num\\
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    57
  \end{plstx}
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    58
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    59
  is ambiguous because a string like "1+2+3" can be parsed
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    60
  as (1+2)+3 or 1+(2+3).
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    61
  }
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    62
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    63
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
\item Suppose boolean expressions are built up from
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    65
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    66
\begin{center}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    67
\begin{tabular}{ll}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    68
1.) & tokens for \texttt{true} and \texttt{false},\\
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    69
2.) & the infix operations \texttt{$\wedge$} and \texttt{$\vee$},\\
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    70
3.) & the prefix operation $\neg$, and\\
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    71
4.) & can be enclosed in parentheses.
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    72
\end{tabular}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    73
\end{center}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    74
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    75
(i) Give a grammar that can recognise such boolean expressions
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    76
and (ii) give a sample string involving all rules given in 1.-4.~that 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    77
can be parsed by this grammar.
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    78
459
780486571e38 updated
Christian Urban <urbanc@in.tum.de>
parents: 401
diff changeset
    79
721
e3c64f22dd31 added slides from Rochester
Christian Urban <christian.urban@kcl.ac.uk>
parents: 647
diff changeset
    80
647
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 596
diff changeset
    81
\item Parsing combinators consist of atomic parsers, alternative
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 596
diff changeset
    82
  parsers, sequence parsers and semantic actions.  What is the purpose
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    83
  of (1) atomic parsers and of (2) map-parsers (also called semantic actions)?
647
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 596
diff changeset
    84
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    85
  \solution{
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    86
    Atomic parsers look at a concrete prefix of the input, like num-tokens or identifiers.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    87
    Map-parsers apply a function to the output of a parser. In this way you can transform
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    88
    the output from, for example, a string to an integer.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    89
  }
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    90
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    91
  
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
    92
\item Parser combinators can directly be given a string as
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
    93
      input, without the need of a lexer. What are the
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
    94
      advantages to first lex a string and then feed a
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
    95
      sequence of tokens as input to the parser?
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
    96
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    97
      \solution{ Reason 1 you can filter out whitespaces and comments,
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    98
        which makes the grammar rules simpler. If you have to make
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    99
        sure that a whitespace comes after a variable say, then your
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   100
        parser rule for variables gets more complicated. Same with
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   101
        comments which do not contribute anything to the parse tree.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   102
        
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   103
        Reason 2) The lexer can already classify tokens, for example
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   104
        as numbers, keywords or identifiers. This again makes the grammar
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   105
        rules more deterministic and as a result faster to parse.
898
45a48c47dcca updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 894
diff changeset
   106
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   107
        The point is that parser combinators can be used to process
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   108
        strings, but in case of compilers where whitespaces and
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   109
        comments need to be filtered out, the lexing phase is actually
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   110
        quite useful.  }
898
45a48c47dcca updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 894
diff changeset
   111
      
894
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   112
\item The injection function for sequence regular expressions is defined
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   113
      by three clauses:
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 721
diff changeset
   114
894
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   115
\begin{center}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   116
\begin{tabular}{l@{\hspace{1mm}}c@{\hspace{1mm}}l}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   117
  $\inj\,(r_1 \cdot r_2)\,c\,\,Seq(v_1,v_2)$ & $\dn$  & $Seq(\inj\,r_1\,c\,v_1,v_2)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   118
  $\inj\,(r_1 \cdot r_2)\,c\,\,\Left(Seq(v_1,v_2))$ & $\dn$  & $Seq(\inj\,r_1\,c\,v_1,v_2)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   119
  $\inj\,(r_1 \cdot r_2)\,c\,\,Right(v)$ & $\dn$  & $Seq(\textit{mkeps}(r_1),\inj\,r_2\,c\,v)$\\
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   120
\end{tabular}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   121
\end{center}
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   122
954
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   123
     Explain why there are three cases in the injection function for sequence
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   124
     regular expressions.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   125
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   126
     \solution{
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   127
       This is because the derivative of sequences can be of the form
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   128
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   129
       \begin{itemize}
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   130
       \item $(der\,c\,r_1)\cdot r_2$
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   131
       \item $(der\,c\,r_1)\cdot r_2 \;+\; der\,c\,r_2$  
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   132
       \end{itemize}
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   133
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   134
       In the first case the value needs to be of the form $Seq$, in the second case $\Left$ or $Right$.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   135
       Therefore 3 cases.
eda0ccf56c72 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   136
       }
894
02ef5c3abc51 updatedHG: added solutions/cw5/fun_tokens.sc
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   137
      
459
780486571e38 updated
Christian Urban <urbanc@in.tum.de>
parents: 401
diff changeset
   138
\item \POSTSCRIPT        
56
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   139
\end{enumerate}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   140
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   141
\end{document}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   142
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   143
%%% Local Variables: 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   144
%%% mode: latex
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   145
%%% TeX-master: t
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   146
%%% End: