# HG changeset patch # User Christian Urban <urbanc@in.tum.de> # Date 1537831626 -3600 # Node ID db5cb071644deaf85d1a5c06a418436a3d0df689 # Parent 447ed6c7cdad781a0b8fda70da9c92649a040a25 update diff -r 447ed6c7cdad -r db5cb071644d progs/app0.scala --- a/progs/app0.scala Mon Sep 24 11:05:39 2018 +0100 +++ b/progs/app0.scala Tue Sep 25 00:27:06 2018 +0100 @@ -1,7 +1,7 @@ import io.Source def get_page(url: String) : String = { - Source.fromURL(url).take(10000).mkString + Source.fromURL(url)("ISO-8859-1").take(10000).mkString } diff -r 447ed6c7cdad -r db5cb071644d progs/app1.scala --- a/progs/app1.scala Mon Sep 24 11:05:39 2018 +0100 +++ b/progs/app1.scala Tue Sep 25 00:27:06 2018 +0100 @@ -1,4 +1,4 @@ def get_page(url: String) : String = { - Try(Source.fromURL(url).take(10000).mkString). - getOrElse { println(s" Problem with: $url"); ""} + Try(Source.fromURL(url)("ISO-8859-1").take(10000).mkString). + getOrElse { println(s" Problem with: $url"); ""} } diff -r 447ed6c7cdad -r db5cb071644d progs/app3.scala --- a/progs/app3.scala Mon Sep 24 11:05:39 2018 +0100 +++ b/progs/app3.scala Tue Sep 25 00:27:06 2018 +0100 @@ -1,4 +1,4 @@ -val my_urls = """urbanc""".r +val my_urls = """urban""".r def crawl(url: String, n: Int) : Unit = { if (n == 0) () diff -r 447ed6c7cdad -r db5cb071644d slides.sty --- a/slides.sty Mon Sep 24 11:05:39 2018 +0100 +++ b/slides.sty Tue Sep 25 00:27:06 2018 +0100 @@ -15,6 +15,7 @@ \hypersetup{linkcolor=darkblue} \hypersetup{urlcolor=darkblue} + \newcommand{\tttext}[1]{{\consolas{#1}}} \newcommand{\ZERO}{\mbox{\bf 0}} diff -r 447ed6c7cdad -r db5cb071644d slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r 447ed6c7cdad -r db5cb071644d slides/slides01.tex --- a/slides/slides01.tex Mon Sep 24 11:05:39 2018 +0100 +++ b/slides/slides01.tex Tue Sep 25 00:27:06 2018 +0100 @@ -13,7 +13,7 @@ xleftmargin=0mm} \newcommand{\bl}[1]{\textcolor{blue}{#1}} - + % beamer stuff \renewcommand{\slidecaption}{CFL 01, King's College London} @@ -39,137 +39,58 @@ \begin{center} \begin{tabular}{ll} Email: & christian.urban at kcl.ac.uk\\ - Office: & N7.07 (North Wing, Bush House)\\ + Office: & N\liningnums{7.07} (North Wing, Bush House)\\ Slides: & KEATS \end{tabular} \end{center} \end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{The Goal of this Course} +\begin{frame}[t] +\frametitle{Why Study Compilers?} -\begin{center} - \begin{tikzpicture}[scale=1, - node/.style={ - rectangle,rounded corners=3mm, - very thick,draw=black!50,minimum height=18mm, minimum width=20mm, - top color=white,bottom color=black!20}] +John Regehr {\small(LLVM compiler hacker)}\smallskip\\ - \node at (3.05, 1.8) {\Large\bf Write A Compiler}; - - \node (0) at (-2.3,0) {}; - - \node (A) at (0,0) [node] {}; - \node [below right] at (A.north west) {lexer}; - - \node (B) at (3,0) [node] {}; - \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; +\begin{bubble}[10.5cm] + \bf ``\ldots{}It’s effectively a perpetual + employment act for solid compiler hackers.'' +\end{bubble} - \node (C) at (6,0) [node] {}; - \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; - - \node (1) at (8.4,0) {}; - - \draw [->,line width=4mm] (0) -- (A); - \draw [->,line width=4mm] (A) -- (B); - \draw [->,line width=4mm] (B) -- (C); - \draw [->,line width=4mm] (C) -- (1); - \end{tikzpicture} - \end{center} +\onslide<1->{ +\only<2>{ +\begin{itemize} +\item {\bf Hardware is getting weirder + rather than getting clocked faster} -\only<2,3>{ -\begin{textblock}{1}(1,2) -\begin{bubble}[9.8cm] -\normalsize -lexer input: a string\smallskip\\ -\hspace{5mm}\code{"read(n);"}\medskip\\ -lexer output: a sequence of tokens\smallskip\\ -\hspace{5mm}\code{key(read); lpar; id(n); rpar; semi} -\end{bubble} -\end{textblock}} - +\begin{itemize} +\item Almost all processors are + multicores nowadays and it looks like there is increasing asymmetry in + resources across cores. Processors come with vector units, crypto + accelerators etc. We have DSPs, GPUs, + ARM big.little, and Xeon Phi. This is only scratching the + surface. +\end{itemize} +\end{itemize}} \only<3>{ -\begin{textblock}{1}(6,7.8) -\begin{tabular}{c} -\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm] -\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta) -\end{tabular} -\end{textblock}} - -\only<4>{ -\begin{textblock}{1}(1,1.5) -\begin{bubble}[8.5cm] -\normalsize -parser input: a sequence of token\smallskip\\ -parser output: an abstract syntax tree\smallskip\\ -\footnotesize -\hspace{2cm}\begin{tikzpicture} - \node {\code{read}} - child {node {\code{lpar}}} - child {node {\code{n}}} - child {node {\code{rpar}}}; -\end{tikzpicture} -\end{bubble} -\end{textblock}} - -\only<5,6>{ -\begin{textblock}{1}(1,1.5) -\begin{bubble}[4cm] -\normalsize -code generator:\smallskip\\ -\hspace{5mm}\code{istore 2}\\ -\hspace{5mm}\code{iload 2}\\ -\hspace{5mm}\code{ldc 10}\\ -\hspace{5mm}\code{isub}\\ -\hspace{5mm}\code{ifeq Label2}\\ -\hspace{5mm}\code{iload 2}\\ -\hspace{5mm}\code{...}\\ -\end{bubble} -\end{textblock}} - -\only<6>{ -\begin{textblock}{6}(8.4,7) -\begin{bubble}[5cm] -\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm] -\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, - xlabel=n, - enlargelimits=0.05, - ybar interval=0.7, legend style=small] -\addplot file {interpreted2.data}; -\addplot file {compiled2.data}; -%\legend{interpreted, compiled} -\end{axis} -\end{tikzpicture}} -\end{bubble} -\end{textblock}} +\begin{itemize} +\item {\bf We’re getting tired of low-level languages and + their associated security disasters} + +\begin{itemize} +\item + We want to write new code, to + whatever extent possible, in safer, higher-level + languages. Compilers are caught right in the middle of these + opposing trends: one of their main jobs is to help bridge the large + and growing gap between increasingly high-level languages and + increasingly wacky platforms. +\end{itemize} +\end{itemize}}} \end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] -\frametitle{The subject is quite old} - -\begin{itemize} -\item Turing Machines, 1936 -\item Regular Expressions, 1956\\ -\item The first compiler for COBOL, 1957\\ (Grace Hopper) -\item But surprisingly research papers are still published nowadays -\end{itemize} - -\begin{flushright} -\includegraphics[scale=0.3]{pics/hopper.jpg}\\ -\footnotesize\textcolor{gray}{Grace Hopper} -\end{flushright} - -\mbox{}\\[-10mm] -{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} - -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] @@ -177,7 +98,7 @@ \begin{columns}[t] \begin{column}{.5\textwidth} -Ruby, Python, Java\medskip\\ +Ruby, Python, Java 8\medskip\\ \begin{tikzpicture}\footnotesize \begin{axis}[ xlabel={$n$}, @@ -195,8 +116,8 @@ legend entries={Python,Ruby}, legend pos=north west, legend cell align=left] -\addplot[blue,mark=*] table {re-python.data}; -\addplot[brown,mark=triangle*] table {re-ruby.data}; +\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data}; +\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data}; \end{axis} \end{tikzpicture} \begin{tikzpicture}\footnotesize @@ -213,9 +134,10 @@ axis lines=left, width=5.5cm, height=4cm, - legend entries={Java}, + legend entries={Python, Java 8}, legend pos=north west, legend cell align=left] +\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data}; \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data}; \end{axis} \end{tikzpicture} @@ -260,7 +182,7 @@ \end{columns}\bigskip \small\centering -matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{[a*]*b} +matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b} against $\underbrace{\texttt{a}...\texttt{a}}_n$ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -268,6 +190,148 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] +\frametitle{The Goal of this Module} + +\begin{center} + \begin{tikzpicture}[scale=1, + node/.style={ + rectangle,rounded corners=3mm, + very thick,draw=black!50,minimum height=18mm, minimum width=20mm, + top color=white,bottom color=black!20}] + + \node at (3.05, 1.8) {\Large\bf Write a compiler}; + + \node (0) at (-2.3,0) {}; + + \node (A) at (0,0) [node] {}; + \node [below right] at (A.north west) {lexer}; + + \node (B) at (3,0) [node] {}; + \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; + + \node (C) at (6,0) [node] {}; + \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; + + \node (1) at (8.4,0) {}; + + \draw [->,line width=4mm] (0) -- (A); + \draw [->,line width=4mm] (A) -- (B); + \draw [->,line width=4mm] (B) -- (C); + \draw [->,line width=4mm] (C) -- (1); + \end{tikzpicture} + \end{center} + +\only<2,3,4>{ +\begin{textblock}{1}(1,2.1) +\begin{bubble}[9.8cm] +\normalsize +lexer input: a string\smallskip\\ +\hspace{5mm}\code{"read(n);"}\medskip\\ +lexer output: a sequence of tokens\smallskip\\ +\hspace{5mm}\code{key(read) lpar id(n) rpar semi} +\end{bubble} +\end{textblock}} + +\only<3,4>{ +\begin{textblock}{1}(6,7.8) +\begin{tabular}{c} +\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm] +\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta) +\end{tabular} +\end{textblock}} + +\only<4>{ +\begin{textblock}{1}(0.5,12)\small +\begin{tabular}{l@{}c@{}l} + \pcode{if} & $\;\Rightarrow\;$ & keyword\\ + \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\ +\end{tabular} +\end{textblock}} + +\only<5>{ +\begin{textblock}{1}(1,1.5) +\begin{bubble}[8.5cm] +\normalsize +parser input: a sequence of tokens\smallskip\\ + +{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\ + +parser output: an abstract syntax tree\smallskip\\ +\footnotesize +\hspace{2cm}\begin{tikzpicture} + \node {\code{read}} + child {node {\code{lpar}}} + child {node {\code{n}}} + child {node {\code{rpar}}}; +\end{tikzpicture} +\end{bubble} +\end{textblock}} + +\only<6,7>{ +\begin{textblock}{1}(1,1.5) +\begin{bubble}[4cm] +\normalsize +code generator:\smallskip\\ +\hspace{5mm}\code{istore 2}\\ +\hspace{5mm}\code{iload 2}\\ +\hspace{5mm}\code{ldc 10}\\ +\hspace{5mm}\code{isub}\\ +\hspace{5mm}\code{ifeq Label2}\\ +\hspace{5mm}\code{iload 2}\\ +\hspace{5mm}\code{...}\\ +\end{bubble} +\end{textblock}} + +\only<7>{ +\begin{textblock}{6}(8.4,7) +\begin{bubble}[5cm] +\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm] +\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, + xlabel=n, + enlargelimits=0.05, + ybar interval=0.7, legend style=small] +\addplot file {interpreted2.data}; +\addplot file {compiled2.data}; +%\legend{interpreted, compiled} +\end{axis} +\end{tikzpicture}} +\end{bubble} +\end{textblock}} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] +\frametitle{The Acad.~Subject is Mature} + +\begin{itemize} +\item Turing Machines, 1936 +\item Regular Expressions, 1956\\ +\item The first compiler for COBOL, 1957\\ (Grace Hopper) +\item But surprisingly research papers are still published nowadays\\ +\item ``Parsing: The Solved Problem That Isn't'' +\end{itemize} + +\begin{flushright} +\includegraphics[scale=0.3]{pics/hopper.jpg}\\ +\footnotesize\textcolor{gray}{Grace Hopper} +\end{flushright} + + +\begin{flushright} +\mbox{}\\[-6mm] +{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show,\\[-2mm] + \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} +\end{flushright} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] \frametitle{Lectures 1 - 5} transforming strings into structured data\\[10mm] @@ -332,7 +396,7 @@ \begin{itemize} \item a web-crawler \item an email harvester -\item \textcolor{gray}{(a web-scraper)} +%\item \textcolor{gray}{(a web-scraper)} \end{itemize} \end{frame} @@ -354,7 +418,6 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode<presentation>{ \begin{frame}[t] \frametitle{A Web-Crawler} @@ -371,7 +434,7 @@ \small (we need a bound for the number of recursive calls) \small (the purpose is to check all links on my own webpage) -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -419,13 +482,13 @@ \frametitle{Scala} \small A simple Scala function for reading webpages: -\smallskip +\bigskip \footnotesize \lstinputlisting{../progs/app0.scala} \medskip\pause -\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")} +\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")} \bigskip\medskip\pause @@ -433,7 +496,7 @@ \smallskip \footnotesize -\lstinputlisting{../progs/app1.scala} +\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala} \end{frame} @@ -456,14 +519,17 @@ matches for example\smallskip\\ \hspace{2mm}\code{"http://www.foobar.com"}\\ -\hspace{2mm}\code{"https://www.tls.org"}\\ +\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\ + +but not\smallskip\\ +\hspace{2mm}\code{"http://www."foo"bar.com"}\\ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[t] -\frametitle{Finding Operations} +\begin{frame}[c] +\frametitle{Finding Operations in Scala} {\bf\code{rexp.findAllIn(string)}}\medskip @@ -497,7 +563,7 @@ \begin{frame}[c] \small -A version that only crawls links in ``my'' domain: +A version that only crawls links in ``my'' domain:\bigskip \footnotesize \lstinputlisting{../progs/app3.scala} @@ -650,7 +716,7 @@ \frametitle{The Meaning of Matching} \begin{bubble}[10cm] -\large +\large\bf A regular expression \bl{$r$} matches a string~\bl{$s$} provided @@ -674,7 +740,7 @@ \begin{itemize} \item Accounts for 80\%.\bigskip -\item You will understand the question ``\textit{Is this relevant for +\item The question ``\textit{Is this relevant for the exam?}'' is very demotivating for the lecturer!\bigskip\\ \item Deal: Whatever is in the homework (and is not marked @@ -701,11 +767,12 @@ \begin{itemize} \item four programming tasks: \begin{itemize} -\item matcher (4\%, 19.10.) -\item lexer (5\%, 03.11.) +\item matcher (4\%, 12.10.) +\item lexer (5\%, 02.11.) \item parser (5\%, 23.11.) -\item compiler (6\%, 7.12.) +\item compiler (6\%, 14.12.) \end{itemize} +\item in any lang.~you like,\\ but I want to see the code \end{itemize} \end{column} @@ -713,8 +780,8 @@ \begin{column}{.5\textwidth} \underline{\bf Strand 2}\smallskip\begin{itemize} \item one task: prove the correctness of a regular expression matcher in -the Isabelle theorem prover -\item 20\%, submission 7.12. +the \underline{Isabelle} theorem prover +\item 20\%, submission on~14.12.\hspace{-5mm}\mbox{} \end{itemize} \end{column} \end{columns}\medskip @@ -734,8 +801,8 @@ \frametitle{Lecture Capture} \begin{itemize} -\item Hope it works\ldots\medskip\pause -\item It is important to use lecture capture wisely: +\item Hope it works\ldots\pause actually no, it does not!\medskip\pause +\item It is important to use lecture capture wisely\\ (it is only the ``baseline''): \begin{itemize} \item Lecture recordings are a study and revision aid. \item Statistically, there is a clear and direct link between attendance and