cws/cw02.tex
changeset 752 c0bdd4ad69ca
parent 750 e93a9e74ca8e
child 797 ddcb616e036a
equal deleted inserted replaced
751:4b208d81e002 752:c0bdd4ad69ca
       
     1 % !TEX program = xelatex
       
     2 \documentclass{article}
       
     3 \usepackage{../style}
       
     4 \usepackage{../langs}
       
     5 
       
     6 \begin{document}
       
     7 
       
     8 \section*{Coursework 2}
       
     9 
       
    10 \noindent This coursework is worth 8\% and is due on \cwTWO{} at
       
    11 18:00. You are asked to implement the Sulzmann \& Lu lexer for the
       
    12 WHILE language. You can do the implementation in any programming
       
    13 language you like, but you need to submit the source code with which
       
    14 you answered the questions, otherwise a mark of 0\% will be
       
    15 awarded. You can submit your answers in a txt-file or as pdf. Code
       
    16 submit as code. Please package everything in a zip-file that creates a
       
    17 directory with the name \texttt{YournameYourfamilyname} on my end. Thanks!
       
    18 
       
    19 \subsection*{Disclaimer\alert}
       
    20 
       
    21 It should be understood that the work you submit represents
       
    22 your own effort. You have not copied from anyone else. An
       
    23 exception is the Scala code from KEATS and the code I showed
       
    24 during the lectures, which you can both freely use. You can
       
    25 also use your own code from the CW~1.
       
    26 
       
    27 \subsection*{Question 1}
       
    28 
       
    29 To implement a lexer for the WHILE language, you first
       
    30 need to design the appropriate regular expressions for the
       
    31 following eleven syntactic entities:
       
    32 
       
    33 \begin{enumerate}
       
    34 \item keywords are
       
    35 
       
    36 \begin{center}
       
    37 \texttt{while}, 
       
    38 \texttt{if}, 
       
    39 \texttt{then}, 
       
    40 \texttt{else}, 
       
    41 \texttt{do}, 
       
    42 \texttt{for}, 
       
    43 \texttt{to}, 
       
    44 \texttt{true}, 
       
    45 \texttt{false}, 
       
    46 \texttt{read}, 
       
    47 \texttt{write},
       
    48 \texttt{skip}
       
    49 \end{center} 
       
    50 
       
    51 \item operators are:
       
    52 \texttt{+}, 
       
    53 \texttt{-}, 
       
    54 \texttt{*}, 
       
    55 \texttt{\%},
       
    56 \texttt{/},
       
    57 \texttt{==}, 
       
    58 \texttt{!=}, 
       
    59 \texttt{>}, 
       
    60 \texttt{<},
       
    61 \texttt{<=}, 
       
    62 \texttt{>=},
       
    63 \texttt{:=},
       
    64 \texttt{\&\&},
       
    65 \texttt{||}
       
    66 
       
    67 \item letters are uppercase and lowercase
       
    68 
       
    69 \item symbols are letters plus the characters
       
    70   \texttt{.},
       
    71   \texttt{\_},
       
    72   \texttt{>},
       
    73   \texttt{<},
       
    74   \texttt{=},
       
    75   \texttt{;},
       
    76   \texttt{,} and
       
    77   \texttt{:}
       
    78 
       
    79 \item strings are enclosed by \texttt{"\ldots"} and consisting of
       
    80   symbols, whitespaces and digits
       
    81 \item parentheses are \texttt{(}, \texttt{\{}, \texttt{)} and \texttt{\}}
       
    82 \item there are semicolons \texttt{;}
       
    83 \item whitespaces are either \texttt{" "} (one or more) or \texttt{$\backslash$n} or
       
    84   \texttt{$\backslash$t}
       
    85 \item identifiers are letters followed by underscores \texttt{\_\!\_}, letters
       
    86 or digits
       
    87 \item numbers are \pcode{0}, \pcode{1}, \ldots and so on; give 
       
    88 a regular expression that can recognise \pcode{0}, but not numbers 
       
    89 with leading zeroes, such as \pcode{001}
       
    90 \item comments start with \texttt{//} and contain symbols, spaces and digits until the end of the line
       
    91 \end{enumerate}
       
    92 
       
    93 \noindent
       
    94 You can use the basic regular expressions 
       
    95 
       
    96 \[
       
    97 \ZERO,\; \ONE,\; c,\; r_1 + r_2,\; r_1 \cdot r_2,\; r^*
       
    98 \]
       
    99 
       
   100 \noindent
       
   101 but also the following extended regular expressions
       
   102 
       
   103 \begin{center}
       
   104 \begin{tabular}{ll}
       
   105 $[c_1,c_2,\ldots,c_n]$ & a set of characters\\
       
   106 $r^+$ & one or more times $r$\\
       
   107 $r^?$ & optional $r$\\
       
   108 $r^{\{n\}}$ & n-times $r$\\
       
   109 \end{tabular}
       
   110 \end{center}
       
   111 
       
   112 \noindent
       
   113 Later on you will also need the record regular expression:
       
   114 
       
   115 \begin{center}
       
   116 \begin{tabular}{ll}
       
   117 $REC(x:r)$ & record regular expression\\
       
   118 \end{tabular}
       
   119 \end{center}
       
   120 
       
   121 \noindent Try to design your regular expressions to be as
       
   122 small as possible. For example you should use character sets
       
   123 for identifiers and numbers. Feel free to use the general
       
   124 character constructor \textit{CFUN} introduced in CW 1.
       
   125 
       
   126 \subsection*{Question 2}
       
   127 
       
   128 Implement the Sulzmann \& Lu lexer from the lectures. For
       
   129 this you need to implement the functions $nullable$ and $der$
       
   130 (you can use your code from CW~1), as well as $mkeps$ and
       
   131 $inj$. These functions need to be appropriately extended for
       
   132 the extended regular expressions from Q1. Write down the 
       
   133 clauses for
       
   134 
       
   135 \begin{center}
       
   136 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   137 $mkeps([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
       
   138 $mkeps(r^+)$                   & $\dn$ & $?$\\
       
   139 $mkeps(r^?)$                   & $\dn$ & $?$\\
       
   140 $mkeps(r^{\{n\}})$             & $\dn$ & $?$\medskip\\
       
   141 $inj\, ([c_1,c_2,\ldots,c_n])\,c\,\ldots$  & $\dn$ & $?$\\
       
   142 $inj\, (r^+)\,c\,\ldots$                   & $\dn$ & $?$\\
       
   143 $inj\, (r^?)\,c\,\ldots$                   & $\dn$ & $?$\\
       
   144 $inj\, (r^{\{n\}})\,c\,\ldots$             & $\dn$ & $?$\\
       
   145 \end{tabular}
       
   146 \end{center}
       
   147 
       
   148 \noindent where $inj$ takes three arguments: a regular
       
   149 expression, a character and a value. Test your lexer code
       
   150 with at least the two small examples below:
       
   151 
       
   152 \begin{center}
       
   153 \begin{tabular}{ll}
       
   154 regex: & string:\smallskip\\
       
   155 $a^{\{3\}}$ & $aaa$\\
       
   156 $(a + \ONE)^{\{3\}}$ & $aa$
       
   157 \end{tabular}
       
   158 \end{center}
       
   159 
       
   160 
       
   161 \noindent Both strings should be successfully lexed by the
       
   162 respective regular expression, that means the lexer returns 
       
   163 in both examples a value.
       
   164 
       
   165 
       
   166 Also add the record regular expression from the
       
   167 lectures to your lexer and implement a function, say
       
   168 \pcode{env}, that returns all assignments from a value (such
       
   169 that you can extract easily the tokens from a value).\medskip 
       
   170 
       
   171 \noindent
       
   172 Finally give the tokens for your regular expressions from Q1 and the
       
   173 string
       
   174 
       
   175 \begin{center}
       
   176 \code{"read n;"}
       
   177 \end{center} 
       
   178 
       
   179 \noindent
       
   180 and use your \pcode{env} function to give the token sequence.
       
   181 
       
   182 
       
   183 \subsection*{Question 3}
       
   184 
       
   185 Extend your lexer from Q2 to also simplify regular expressions after
       
   186 each derivation step and rectify the computed values after each
       
   187 injection. Use this lexer to tokenize the programs in
       
   188 Figures~\ref{fib} -- \ref{collatz}. You can find the programms also on
       
   189 KEATS. Give the tokens of these programs where whitespaces are
       
   190 filtered out. Make sure you can tokenise \textbf{exactly} these
       
   191 programs.\bigskip
       
   192 
       
   193 
       
   194 \begin{figure}[h]
       
   195 \mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/fib.while}}
       
   196 \caption{Fibonacci program in the WHILE language.\label{fib}}
       
   197 \end{figure}
       
   198 
       
   199 \begin{figure}[h]
       
   200 \mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/loops.while}}
       
   201 \caption{The three-nested-loops program in the WHILE language. 
       
   202 (Usually used for timing measurements.)\label{loop}}
       
   203 \end{figure}
       
   204 
       
   205 \begin{figure}[h]
       
   206 \mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/factors.while}}
       
   207 \caption{A program that calculates factors for numbers in the WHILE
       
   208   language.\label{factors}}
       
   209 \end{figure}
       
   210 
       
   211 \begin{figure}[h]
       
   212 \mbox{\lstinputlisting[language=While,xleftmargin=10mm]{../progs/while-tests/collatz2.while}}
       
   213 \caption{A program that calculates the Collatz series for numbers
       
   214   between 1 and 100.\label{collatz}}
       
   215 \end{figure}
       
   216 
       
   217 \end{document}
       
   218 
       
   219 %%% Local Variables: 
       
   220 %%% mode: latex
       
   221 %%% TeX-master: t
       
   222 %%% End: